From: Stephen Rothwell Date: Mon, 7 Nov 2011 02:27:35 +0000 (+1100) Subject: Merge remote-tracking branch 'moduleh/for-sfr' X-Git-Tag: next-20111107~3 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=bb063136806b43ea90f746c16e531b72ed5de139;p=karo-tx-linux.git Merge remote-tracking branch 'moduleh/for-sfr' Conflicts: drivers/media/dvb/frontends/dibx000_common.c drivers/media/video/mt9m111.c drivers/media/video/ov6650.c drivers/mfd/ab3550-core.c include/linux/dmaengine.h --- bb063136806b43ea90f746c16e531b72ed5de139 diff --cc arch/arm/mach-omap2/voltage.c index 64070ac1e761,e964cfd3a3d0..1f8fdf736e63 --- a/arch/arm/mach-omap2/voltage.c +++ b/arch/arm/mach-omap2/voltage.c @@@ -21,10 -21,11 +21,11 @@@ #include #include -#include #include + #include #include #include +#include #include diff --cc arch/arm/plat-samsung/dev-backlight.c index 2adbeaed4c04,a976c023b286..e657305644cc --- a/arch/arm/plat-samsung/dev-backlight.c +++ b/arch/arm/plat-samsung/dev-backlight.c @@@ -12,9 -12,9 +12,10 @@@ #include #include + #include #include #include +#include #include #include diff --cc arch/arm/plat-samsung/dma-ops.c index 6e3d9abc9e2e,000000000000..93a994a5dd8f mode 100644,000000..100644 --- a/arch/arm/plat-samsung/dma-ops.c +++ b/arch/arm/plat-samsung/dma-ops.c @@@ -1,131 -1,0 +1,132 @@@ +/* linux/arch/arm/plat-samsung/dma-ops.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Samsung DMA Operations + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include ++#include + +#include + +static inline bool pl330_filter(struct dma_chan *chan, void *param) +{ + struct dma_pl330_peri *peri = chan->private; + return peri->peri_id == (unsigned)param; +} + +static unsigned samsung_dmadev_request(enum dma_ch dma_ch, + struct samsung_dma_info *info) +{ + struct dma_chan *chan; + dma_cap_mask_t mask; + struct dma_slave_config slave_config; + + dma_cap_zero(mask); + dma_cap_set(info->cap, mask); + + chan = dma_request_channel(mask, pl330_filter, (void *)dma_ch); + + if (info->direction == DMA_FROM_DEVICE) { + memset(&slave_config, 0, sizeof(struct dma_slave_config)); + slave_config.direction = info->direction; + slave_config.src_addr = info->fifo; + slave_config.src_addr_width = info->width; + slave_config.src_maxburst = 1; + dmaengine_slave_config(chan, &slave_config); + } else if (info->direction == DMA_TO_DEVICE) { + memset(&slave_config, 0, sizeof(struct dma_slave_config)); + slave_config.direction = info->direction; + slave_config.dst_addr = info->fifo; + slave_config.dst_addr_width = info->width; + slave_config.dst_maxburst = 1; + dmaengine_slave_config(chan, &slave_config); + } + + return (unsigned)chan; +} + +static int samsung_dmadev_release(unsigned ch, + struct s3c2410_dma_client *client) +{ + dma_release_channel((struct dma_chan *)ch); + + return 0; +} + +static int samsung_dmadev_prepare(unsigned ch, + struct samsung_dma_prep_info *info) +{ + struct scatterlist sg; + struct dma_chan *chan = (struct dma_chan *)ch; + struct dma_async_tx_descriptor *desc; + + switch (info->cap) { + case DMA_SLAVE: + sg_init_table(&sg, 1); + sg_dma_len(&sg) = info->len; + sg_set_page(&sg, pfn_to_page(PFN_DOWN(info->buf)), + info->len, offset_in_page(info->buf)); + sg_dma_address(&sg) = info->buf; + + desc = chan->device->device_prep_slave_sg(chan, + &sg, 1, info->direction, DMA_PREP_INTERRUPT); + break; + case DMA_CYCLIC: + desc = chan->device->device_prep_dma_cyclic(chan, + info->buf, info->len, info->period, info->direction); + break; + default: + dev_err(&chan->dev->device, "unsupported format\n"); + return -EFAULT; + } + + if (!desc) { + dev_err(&chan->dev->device, "cannot prepare cyclic dma\n"); + return -EFAULT; + } + + desc->callback = info->fp; + desc->callback_param = info->fp_param; + + dmaengine_submit((struct dma_async_tx_descriptor *)desc); + + return 0; +} + +static inline int samsung_dmadev_trigger(unsigned ch) +{ + dma_async_issue_pending((struct dma_chan *)ch); + + return 0; +} + +static inline int samsung_dmadev_flush(unsigned ch) +{ + return dmaengine_terminate_all((struct dma_chan *)ch); +} + +struct samsung_dma_ops dmadev_ops = { + .request = samsung_dmadev_request, + .release = samsung_dmadev_release, + .prepare = samsung_dmadev_prepare, + .trigger = samsung_dmadev_trigger, + .started = NULL, + .flush = samsung_dmadev_flush, + .stop = samsung_dmadev_flush, +}; + +void *samsung_dmadev_get_ops(void) +{ + return &dmadev_ops; +} +EXPORT_SYMBOL(samsung_dmadev_get_ops); diff --cc arch/arm/plat-samsung/s3c-dma-ops.c index 582333c70585,000000000000..781494912827 mode 100644,000000..100644 --- a/arch/arm/plat-samsung/s3c-dma-ops.c +++ b/arch/arm/plat-samsung/s3c-dma-ops.c @@@ -1,130 -1,0 +1,131 @@@ +/* linux/arch/arm/plat-samsung/s3c-dma-ops.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Samsung S3C-DMA Operations + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include ++#include + +#include + +struct cb_data { + void (*fp) (void *); + void *fp_param; + unsigned ch; + struct list_head node; +}; + +static LIST_HEAD(dma_list); + +static void s3c_dma_cb(struct s3c2410_dma_chan *channel, void *param, + int size, enum s3c2410_dma_buffresult res) +{ + struct cb_data *data = param; + + data->fp(data->fp_param); +} + +static unsigned s3c_dma_request(enum dma_ch dma_ch, + struct samsung_dma_info *info) +{ + struct cb_data *data; + + if (s3c2410_dma_request(dma_ch, info->client, NULL) < 0) { + s3c2410_dma_free(dma_ch, info->client); + return 0; + } + + data = kzalloc(sizeof(struct cb_data), GFP_KERNEL); + data->ch = dma_ch; + list_add_tail(&data->node, &dma_list); + + s3c2410_dma_devconfig(dma_ch, info->direction, info->fifo); + + if (info->cap == DMA_CYCLIC) + s3c2410_dma_setflags(dma_ch, S3C2410_DMAF_CIRCULAR); + + s3c2410_dma_config(dma_ch, info->width); + + return (unsigned)dma_ch; +} + +static int s3c_dma_release(unsigned ch, struct s3c2410_dma_client *client) +{ + struct cb_data *data; + + list_for_each_entry(data, &dma_list, node) + if (data->ch == ch) + break; + list_del(&data->node); + + s3c2410_dma_free(ch, client); + kfree(data); + + return 0; +} + +static int s3c_dma_prepare(unsigned ch, struct samsung_dma_prep_info *info) +{ + struct cb_data *data; + int len = (info->cap == DMA_CYCLIC) ? info->period : info->len; + + list_for_each_entry(data, &dma_list, node) + if (data->ch == ch) + break; + + if (!data->fp) { + s3c2410_dma_set_buffdone_fn(ch, s3c_dma_cb); + data->fp = info->fp; + data->fp_param = info->fp_param; + } + + s3c2410_dma_enqueue(ch, (void *)data, info->buf, len); + + return 0; +} + +static inline int s3c_dma_trigger(unsigned ch) +{ + return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_START); +} + +static inline int s3c_dma_started(unsigned ch) +{ + return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_STARTED); +} + +static inline int s3c_dma_flush(unsigned ch) +{ + return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_FLUSH); +} + +static inline int s3c_dma_stop(unsigned ch) +{ + return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_STOP); +} + +static struct samsung_dma_ops s3c_dma_ops = { + .request = s3c_dma_request, + .release = s3c_dma_release, + .prepare = s3c_dma_prepare, + .trigger = s3c_dma_trigger, + .started = s3c_dma_started, + .flush = s3c_dma_flush, + .stop = s3c_dma_stop, +}; + +void *s3c_dma_get_ops(void) +{ + return &s3c_dma_ops; +} +EXPORT_SYMBOL(s3c_dma_get_ops); diff --cc arch/microblaze/kernel/dma.c index dc6416d265d6,b159b8a847d6..65a4af4cbbbe --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@@ -10,7 -10,9 +10,8 @@@ #include #include #include + #include #include -#include /* * Generic direct DMA implementation diff --cc arch/sh/kernel/topology.c index ecc2d3d0f54a,ab37955b453a..4649a6ff0cfe --- a/arch/sh/kernel/topology.c +++ b/arch/sh/kernel/topology.c @@@ -11,9 -11,9 +11,10 @@@ #include #include #include +#include #include #include + #include static DEFINE_PER_CPU(struct cpu, cpu_devices); diff --cc arch/x86/crypto/aes_glue.c index b0b6950cc8c8,bdce3eeeaa37..8efcf42a9d7e --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@@ -3,8 -3,8 +3,9 @@@ * */ + #include #include +#include asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); diff --cc arch/x86/kernel/cpu/mcheck/mce.c index 864830e1dd65,537c89e00095..362056aefeb4 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@@ -36,7 -36,9 +36,8 @@@ #include #include #include -#include #include + #include #include #include diff --cc drivers/infiniband/core/verbs.c index 42517500b223,a8923ffc6459..602b1bd723a9 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@@ -38,8 -38,8 +38,9 @@@ #include #include + #include #include +#include #include #include diff --cc drivers/md/dm-bufio.c index cb246667dd52,000000000000..0a6806f80ab5 mode 100644,000000..100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@@ -1,1699 -1,0 +1,1700 @@@ +/* + * Copyright (C) 2009-2011 Red Hat, Inc. + * + * Author: Mikulas Patocka + * + * This file is released under the GPL. + */ + +#include "dm-bufio.h" + +#include +#include +#include +#include +#include +#include ++#include + +#define DM_MSG_PREFIX "bufio" + +/* + * Memory management policy: + * Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory + * or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower). + * Always allocate at least DM_BUFIO_MIN_BUFFERS buffers. + * Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT + * dirty buffers. + */ +#define DM_BUFIO_MIN_BUFFERS 8 + +#define DM_BUFIO_MEMORY_PERCENT 2 +#define DM_BUFIO_VMALLOC_PERCENT 25 +#define DM_BUFIO_WRITEBACK_PERCENT 75 + +/* + * Check buffer ages in this interval (seconds) + */ +#define DM_BUFIO_WORK_TIMER_SECS 10 + +/* + * Free buffers when they are older than this (seconds) + */ +#define DM_BUFIO_DEFAULT_AGE_SECS 60 + +/* + * The number of bvec entries that are embedded directly in the buffer. + * If the chunk size is larger, dm-io is used to do the io. + */ +#define DM_BUFIO_INLINE_VECS 16 + +/* + * Buffer hash + */ +#define DM_BUFIO_HASH_BITS 20 +#define DM_BUFIO_HASH(block) \ + ((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \ + ((1 << DM_BUFIO_HASH_BITS) - 1)) + +/* + * Don't try to use kmem_cache_alloc for blocks larger than this. + * For explanation, see alloc_buffer_data below. + */ +#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1) +#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) + +/* + * dm_buffer->list_mode + */ +#define LIST_CLEAN 0 +#define LIST_DIRTY 1 +#define LIST_SIZE 2 + +/* + * Linking of buffers: + * All buffers are linked to cache_hash with their hash_list field. + * + * Clean buffers that are not being written (B_WRITING not set) + * are linked to lru[LIST_CLEAN] with their lru_list field. + * + * Dirty and clean buffers that are being written are linked to + * lru[LIST_DIRTY] with their lru_list field. When the write + * finishes, the buffer cannot be relinked immediately (because we + * are in an interrupt context and relinking requires process + * context), so some clean-not-writing buffers can be held on + * dirty_lru too. They are later added to lru in the process + * context. + */ +struct dm_bufio_client { + struct mutex lock; + + struct list_head lru[LIST_SIZE]; + unsigned long n_buffers[LIST_SIZE]; + + struct block_device *bdev; + unsigned block_size; + unsigned char sectors_per_block_bits; + unsigned char pages_per_block_bits; + unsigned char blocks_per_page_bits; + unsigned aux_size; + void (*alloc_callback)(struct dm_buffer *); + void (*write_callback)(struct dm_buffer *); + + struct dm_io_client *dm_io; + + struct list_head reserved_buffers; + unsigned need_reserved_buffers; + + struct hlist_head *cache_hash; + wait_queue_head_t free_buffer_wait; + + int async_write_error; + + struct list_head client_list; + struct shrinker shrinker; +}; + +/* + * Buffer state bits. + */ +#define B_READING 0 +#define B_WRITING 1 +#define B_DIRTY 2 + +/* + * Describes how the block was allocated: + * kmem_cache_alloc(), __get_free_pages() or vmalloc(). + * See the comment at alloc_buffer_data. + */ +enum data_mode { + DATA_MODE_SLAB = 0, + DATA_MODE_GET_FREE_PAGES = 1, + DATA_MODE_VMALLOC = 2, + DATA_MODE_LIMIT = 3 +}; + +struct dm_buffer { + struct hlist_node hash_list; + struct list_head lru_list; + sector_t block; + void *data; + enum data_mode data_mode; + unsigned char list_mode; /* LIST_* */ + unsigned hold_count; + int read_error; + int write_error; + unsigned long state; + unsigned long last_accessed; + struct dm_bufio_client *c; + struct bio bio; + struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS]; +}; + +/*----------------------------------------------------------------*/ + +static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT]; +static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT]; + +static inline int dm_bufio_cache_index(struct dm_bufio_client *c) +{ + unsigned ret = c->blocks_per_page_bits - 1; + + BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches)); + + return ret; +} + +#define DM_BUFIO_CACHE(c) (dm_bufio_caches[dm_bufio_cache_index(c)]) +#define DM_BUFIO_CACHE_NAME(c) (dm_bufio_cache_names[dm_bufio_cache_index(c)]) + +#define dm_bufio_in_request() (!!current->bio_list) + +static void dm_bufio_lock(struct dm_bufio_client *c) +{ + mutex_lock_nested(&c->lock, dm_bufio_in_request()); +} + +static int dm_bufio_trylock(struct dm_bufio_client *c) +{ + return mutex_trylock(&c->lock); +} + +static void dm_bufio_unlock(struct dm_bufio_client *c) +{ + mutex_unlock(&c->lock); +} + +/* + * FIXME Move to sched.h? + */ +#ifdef CONFIG_PREEMPT_VOLUNTARY +# define dm_bufio_cond_resched() \ +do { \ + if (unlikely(need_resched())) \ + _cond_resched(); \ +} while (0) +#else +# define dm_bufio_cond_resched() do { } while (0) +#endif + +/*----------------------------------------------------------------*/ + +/* + * Default cache size: available memory divided by the ratio. + */ +static unsigned long dm_bufio_default_cache_size; + +/* + * Total cache size set by the user. + */ +static unsigned long dm_bufio_cache_size; + +/* + * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change + * at any time. If it disagrees, the user has changed cache size. + */ +static unsigned long dm_bufio_cache_size_latch; + +static DEFINE_SPINLOCK(param_spinlock); + +/* + * Buffers are freed after this timeout + */ +static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; + +static unsigned long dm_bufio_peak_allocated; +static unsigned long dm_bufio_allocated_kmem_cache; +static unsigned long dm_bufio_allocated_get_free_pages; +static unsigned long dm_bufio_allocated_vmalloc; +static unsigned long dm_bufio_current_allocated; + +/*----------------------------------------------------------------*/ + +/* + * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count + */ +static unsigned long dm_bufio_cache_size_per_client; + +/* + * The current number of clients. + */ +static int dm_bufio_client_count; + +/* + * The list of all clients. + */ +static LIST_HEAD(dm_bufio_all_clients); + +/* + * This mutex protects dm_bufio_cache_size_latch, + * dm_bufio_cache_size_per_client and dm_bufio_client_count + */ +static DEFINE_MUTEX(dm_bufio_clients_lock); + +/*----------------------------------------------------------------*/ + +static void adjust_total_allocated(enum data_mode data_mode, long diff) +{ + static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { + &dm_bufio_allocated_kmem_cache, + &dm_bufio_allocated_get_free_pages, + &dm_bufio_allocated_vmalloc, + }; + + spin_lock(¶m_spinlock); + + *class_ptr[data_mode] += diff; + + dm_bufio_current_allocated += diff; + + if (dm_bufio_current_allocated > dm_bufio_peak_allocated) + dm_bufio_peak_allocated = dm_bufio_current_allocated; + + spin_unlock(¶m_spinlock); +} + +/* + * Change the number of clients and recalculate per-client limit. + */ +static void __cache_size_refresh(void) +{ + BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock)); + BUG_ON(dm_bufio_client_count < 0); + + dm_bufio_cache_size_latch = dm_bufio_cache_size; + + barrier(); + + /* + * Use default if set to 0 and report the actual cache size used. + */ + if (!dm_bufio_cache_size_latch) { + (void)cmpxchg(&dm_bufio_cache_size, 0, + dm_bufio_default_cache_size); + dm_bufio_cache_size_latch = dm_bufio_default_cache_size; + } + + dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch / + (dm_bufio_client_count ? : 1); +} + +/* + * Allocating buffer data. + * + * Small buffers are allocated with kmem_cache, to use space optimally. + * + * For large buffers, we choose between get_free_pages and vmalloc. + * Each has advantages and disadvantages. + * + * __get_free_pages can randomly fail if the memory is fragmented. + * __vmalloc won't randomly fail, but vmalloc space is limited (it may be + * as low as 128M) so using it for caching is not appropriate. + * + * If the allocation may fail we use __get_free_pages. Memory fragmentation + * won't have a fatal effect here, but it just causes flushes of some other + * buffers and more I/O will be performed. Don't use __get_free_pages if it + * always fails (i.e. order >= MAX_ORDER). + * + * If the allocation shouldn't fail we use __vmalloc. This is only for the + * initial reserve allocation, so there's no risk of wasting all vmalloc + * space. + */ +static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, + enum data_mode *data_mode) +{ + if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) { + *data_mode = DATA_MODE_SLAB; + return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask); + } + + if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT && + gfp_mask & __GFP_NORETRY) { + *data_mode = DATA_MODE_GET_FREE_PAGES; + return (void *)__get_free_pages(gfp_mask, + c->pages_per_block_bits); + } + + *data_mode = DATA_MODE_VMALLOC; + return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); +} + +/* + * Free buffer's data. + */ +static void free_buffer_data(struct dm_bufio_client *c, + void *data, enum data_mode data_mode) +{ + switch (data_mode) { + case DATA_MODE_SLAB: + kmem_cache_free(DM_BUFIO_CACHE(c), data); + break; + + case DATA_MODE_GET_FREE_PAGES: + free_pages((unsigned long)data, c->pages_per_block_bits); + break; + + case DATA_MODE_VMALLOC: + vfree(data); + break; + + default: + DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d", + data_mode); + BUG(); + } +} + +/* + * Allocate buffer and its data. + */ +static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) +{ + struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size, + gfp_mask); + + if (!b) + return NULL; + + b->c = c; + + b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode); + if (!b->data) { + kfree(b); + return NULL; + } + + adjust_total_allocated(b->data_mode, (long)c->block_size); + + return b; +} + +/* + * Free buffer and its data. + */ +static void free_buffer(struct dm_buffer *b) +{ + struct dm_bufio_client *c = b->c; + + adjust_total_allocated(b->data_mode, -(long)c->block_size); + + free_buffer_data(c, b->data, b->data_mode); + kfree(b); +} + +/* + * Link buffer to the hash list and clean or dirty queue. + */ +static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) +{ + struct dm_bufio_client *c = b->c; + + c->n_buffers[dirty]++; + b->block = block; + b->list_mode = dirty; + list_add(&b->lru_list, &c->lru[dirty]); + hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]); + b->last_accessed = jiffies; +} + +/* + * Unlink buffer from the hash list and dirty or clean queue. + */ +static void __unlink_buffer(struct dm_buffer *b) +{ + struct dm_bufio_client *c = b->c; + + BUG_ON(!c->n_buffers[b->list_mode]); + + c->n_buffers[b->list_mode]--; + hlist_del(&b->hash_list); + list_del(&b->lru_list); +} + +/* + * Place the buffer to the head of dirty or clean LRU queue. + */ +static void __relink_lru(struct dm_buffer *b, int dirty) +{ + struct dm_bufio_client *c = b->c; + + BUG_ON(!c->n_buffers[b->list_mode]); + + c->n_buffers[b->list_mode]--; + c->n_buffers[dirty]++; + b->list_mode = dirty; + list_del(&b->lru_list); + list_add(&b->lru_list, &c->lru[dirty]); +} + +/*---------------------------------------------------------------- + * Submit I/O on the buffer. + * + * Bio interface is faster but it has some problems: + * the vector list is limited (increasing this limit increases + * memory-consumption per buffer, so it is not viable); + * + * the memory must be direct-mapped, not vmalloced; + * + * the I/O driver can reject requests spuriously if it thinks that + * the requests are too big for the device or if they cross a + * controller-defined memory boundary. + * + * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and + * it is not vmalloced, try using the bio interface. + * + * If the buffer is big, if it is vmalloced or if the underlying device + * rejects the bio because it is too large, use dm-io layer to do the I/O. + * The dm-io layer splits the I/O into multiple requests, avoiding the above + * shortcomings. + *--------------------------------------------------------------*/ + +/* + * dm-io completion routine. It just calls b->bio.bi_end_io, pretending + * that the request was handled directly with bio interface. + */ +static void dmio_complete(unsigned long error, void *context) +{ + struct dm_buffer *b = context; + + b->bio.bi_end_io(&b->bio, error ? -EIO : 0); +} + +static void use_dmio(struct dm_buffer *b, int rw, sector_t block, + bio_end_io_t *end_io) +{ + int r; + struct dm_io_request io_req = { + .bi_rw = rw, + .notify.fn = dmio_complete, + .notify.context = b, + .client = b->c->dm_io, + }; + struct dm_io_region region = { + .bdev = b->c->bdev, + .sector = block << b->c->sectors_per_block_bits, + .count = b->c->block_size >> SECTOR_SHIFT, + }; + + if (b->data_mode != DATA_MODE_VMALLOC) { + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = b->data; + } else { + io_req.mem.type = DM_IO_VMA; + io_req.mem.ptr.vma = b->data; + } + + b->bio.bi_end_io = end_io; + + r = dm_io(&io_req, 1, ®ion, NULL); + if (r) + end_io(&b->bio, r); +} + +static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, + bio_end_io_t *end_io) +{ + char *ptr; + int len; + + bio_init(&b->bio); + b->bio.bi_io_vec = b->bio_vec; + b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; + b->bio.bi_sector = block << b->c->sectors_per_block_bits; + b->bio.bi_bdev = b->c->bdev; + b->bio.bi_end_io = end_io; + + /* + * We assume that if len >= PAGE_SIZE ptr is page-aligned. + * If len < PAGE_SIZE the buffer doesn't cross page boundary. + */ + ptr = b->data; + len = b->c->block_size; + + if (len >= PAGE_SIZE) + BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1)); + else + BUG_ON((unsigned long)ptr & (len - 1)); + + do { + if (!bio_add_page(&b->bio, virt_to_page(ptr), + len < PAGE_SIZE ? len : PAGE_SIZE, + virt_to_phys(ptr) & (PAGE_SIZE - 1))) { + BUG_ON(b->c->block_size <= PAGE_SIZE); + use_dmio(b, rw, block, end_io); + return; + } + + len -= PAGE_SIZE; + ptr += PAGE_SIZE; + } while (len > 0); + + submit_bio(rw, &b->bio); +} + +static void submit_io(struct dm_buffer *b, int rw, sector_t block, + bio_end_io_t *end_io) +{ + if (rw == WRITE && b->c->write_callback) + b->c->write_callback(b); + + if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE && + b->data_mode != DATA_MODE_VMALLOC) + use_inline_bio(b, rw, block, end_io); + else + use_dmio(b, rw, block, end_io); +} + +/*---------------------------------------------------------------- + * Writing dirty buffers + *--------------------------------------------------------------*/ + +/* + * The endio routine for write. + * + * Set the error, clear B_WRITING bit and wake anyone who was waiting on + * it. + */ +static void write_endio(struct bio *bio, int error) +{ + struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); + + b->write_error = error; + if (error) { + struct dm_bufio_client *c = b->c; + (void)cmpxchg(&c->async_write_error, 0, error); + } + + BUG_ON(!test_bit(B_WRITING, &b->state)); + + smp_mb__before_clear_bit(); + clear_bit(B_WRITING, &b->state); + smp_mb__after_clear_bit(); + + wake_up_bit(&b->state, B_WRITING); +} + +/* + * This function is called when wait_on_bit is actually waiting. + */ +static int do_io_schedule(void *word) +{ + io_schedule(); + + return 0; +} + +/* + * Initiate a write on a dirty buffer, but don't wait for it. + * + * - If the buffer is not dirty, exit. + * - If there some previous write going on, wait for it to finish (we can't + * have two writes on the same buffer simultaneously). + * - Submit our write and don't wait on it. We set B_WRITING indicating + * that there is a write in progress. + */ +static void __write_dirty_buffer(struct dm_buffer *b) +{ + if (!test_bit(B_DIRTY, &b->state)) + return; + + clear_bit(B_DIRTY, &b->state); + wait_on_bit_lock(&b->state, B_WRITING, + do_io_schedule, TASK_UNINTERRUPTIBLE); + + submit_io(b, WRITE, b->block, write_endio); +} + +/* + * Wait until any activity on the buffer finishes. Possibly write the + * buffer if it is dirty. When this function finishes, there is no I/O + * running on the buffer and the buffer is not dirty. + */ +static void __make_buffer_clean(struct dm_buffer *b) +{ + BUG_ON(b->hold_count); + + if (!b->state) /* fast case */ + return; + + wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE); + __write_dirty_buffer(b); + wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE); +} + +/* + * Find some buffer that is not held by anybody, clean it, unlink it and + * return it. + */ +static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) +{ + struct dm_buffer *b; + + list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) { + BUG_ON(test_bit(B_WRITING, &b->state)); + BUG_ON(test_bit(B_DIRTY, &b->state)); + + if (!b->hold_count) { + __make_buffer_clean(b); + __unlink_buffer(b); + return b; + } + dm_bufio_cond_resched(); + } + + list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) { + BUG_ON(test_bit(B_READING, &b->state)); + + if (!b->hold_count) { + __make_buffer_clean(b); + __unlink_buffer(b); + return b; + } + dm_bufio_cond_resched(); + } + + return NULL; +} + +/* + * Wait until some other threads free some buffer or release hold count on + * some buffer. + * + * This function is entered with c->lock held, drops it and regains it + * before exiting. + */ +static void __wait_for_free_buffer(struct dm_bufio_client *c) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&c->free_buffer_wait, &wait); + set_task_state(current, TASK_UNINTERRUPTIBLE); + dm_bufio_unlock(c); + + io_schedule(); + + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&c->free_buffer_wait, &wait); + + dm_bufio_lock(c); +} + +/* + * Allocate a new buffer. If the allocation is not possible, wait until + * some other thread frees a buffer. + * + * May drop the lock and regain it. + */ +static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c) +{ + struct dm_buffer *b; + + /* + * dm-bufio is resistant to allocation failures (it just keeps + * one buffer reserved in cases all the allocations fail). + * So set flags to not try too hard: + * GFP_NOIO: don't recurse into the I/O layer + * __GFP_NORETRY: don't retry and rather return failure + * __GFP_NOMEMALLOC: don't use emergency reserves + * __GFP_NOWARN: don't print a warning in case of failure + * + * For debugging, if we set the cache size to 1, no new buffers will + * be allocated. + */ + while (1) { + if (dm_bufio_cache_size_latch != 1) { + b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (b) + return b; + } + + if (!list_empty(&c->reserved_buffers)) { + b = list_entry(c->reserved_buffers.next, + struct dm_buffer, lru_list); + list_del(&b->lru_list); + c->need_reserved_buffers++; + + return b; + } + + b = __get_unclaimed_buffer(c); + if (b) + return b; + + __wait_for_free_buffer(c); + } +} + +static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c) +{ + struct dm_buffer *b = __alloc_buffer_wait_no_callback(c); + + if (c->alloc_callback) + c->alloc_callback(b); + + return b; +} + +/* + * Free a buffer and wake other threads waiting for free buffers. + */ +static void __free_buffer_wake(struct dm_buffer *b) +{ + struct dm_bufio_client *c = b->c; + + if (!c->need_reserved_buffers) + free_buffer(b); + else { + list_add(&b->lru_list, &c->reserved_buffers); + c->need_reserved_buffers--; + } + + wake_up(&c->free_buffer_wait); +} + +static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait) +{ + struct dm_buffer *b, *tmp; + + list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) { + BUG_ON(test_bit(B_READING, &b->state)); + + if (!test_bit(B_DIRTY, &b->state) && + !test_bit(B_WRITING, &b->state)) { + __relink_lru(b, LIST_CLEAN); + continue; + } + + if (no_wait && test_bit(B_WRITING, &b->state)) + return; + + __write_dirty_buffer(b); + dm_bufio_cond_resched(); + } +} + +/* + * Get writeback threshold and buffer limit for a given client. + */ +static void __get_memory_limit(struct dm_bufio_client *c, + unsigned long *threshold_buffers, + unsigned long *limit_buffers) +{ + unsigned long buffers; + + if (dm_bufio_cache_size != dm_bufio_cache_size_latch) { + mutex_lock(&dm_bufio_clients_lock); + __cache_size_refresh(); + mutex_unlock(&dm_bufio_clients_lock); + } + + buffers = dm_bufio_cache_size_per_client >> + (c->sectors_per_block_bits + SECTOR_SHIFT); + + if (buffers < DM_BUFIO_MIN_BUFFERS) + buffers = DM_BUFIO_MIN_BUFFERS; + + *limit_buffers = buffers; + *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100; +} + +/* + * Check if we're over watermark. + * If we are over threshold_buffers, start freeing buffers. + * If we're over "limit_buffers", block until we get under the limit. + */ +static void __check_watermark(struct dm_bufio_client *c) +{ + unsigned long threshold_buffers, limit_buffers; + + __get_memory_limit(c, &threshold_buffers, &limit_buffers); + + while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] > + limit_buffers) { + + struct dm_buffer *b = __get_unclaimed_buffer(c); + + if (!b) + return; + + __free_buffer_wake(b); + dm_bufio_cond_resched(); + } + + if (c->n_buffers[LIST_DIRTY] > threshold_buffers) + __write_dirty_buffers_async(c, 1); +} + +/* + * Find a buffer in the hash. + */ +static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) +{ + struct dm_buffer *b; + struct hlist_node *hn; + + hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], + hash_list) { + dm_bufio_cond_resched(); + if (b->block == block) + return b; + } + + return NULL; +} + +/*---------------------------------------------------------------- + * Getting a buffer + *--------------------------------------------------------------*/ + +enum new_flag { + NF_FRESH = 0, + NF_READ = 1, + NF_GET = 2 +}; + +static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, + enum new_flag nf, struct dm_buffer **bp, + int *need_submit) +{ + struct dm_buffer *b, *new_b = NULL; + + *need_submit = 0; + + b = __find(c, block); + if (b) { + b->hold_count++; + __relink_lru(b, test_bit(B_DIRTY, &b->state) || + test_bit(B_WRITING, &b->state)); + return b; + } + + if (nf == NF_GET) + return NULL; + + new_b = __alloc_buffer_wait(c); + + /* + * We've had a period where the mutex was unlocked, so need to + * recheck the hash table. + */ + b = __find(c, block); + if (b) { + __free_buffer_wake(new_b); + b->hold_count++; + __relink_lru(b, test_bit(B_DIRTY, &b->state) || + test_bit(B_WRITING, &b->state)); + return b; + } + + __check_watermark(c); + + b = new_b; + b->hold_count = 1; + b->read_error = 0; + b->write_error = 0; + __link_buffer(b, block, LIST_CLEAN); + + if (nf == NF_FRESH) { + b->state = 0; + return b; + } + + b->state = 1 << B_READING; + *need_submit = 1; + + return b; +} + +/* + * The endio routine for reading: set the error, clear the bit and wake up + * anyone waiting on the buffer. + */ +static void read_endio(struct bio *bio, int error) +{ + struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); + + b->read_error = error; + + BUG_ON(!test_bit(B_READING, &b->state)); + + smp_mb__before_clear_bit(); + clear_bit(B_READING, &b->state); + smp_mb__after_clear_bit(); + + wake_up_bit(&b->state, B_READING); +} + +/* + * A common routine for dm_bufio_new and dm_bufio_read. Operation of these + * functions is similar except that dm_bufio_new doesn't read the + * buffer from the disk (assuming that the caller overwrites all the data + * and uses dm_bufio_mark_buffer_dirty to write new data back). + */ +static void *new_read(struct dm_bufio_client *c, sector_t block, + enum new_flag nf, struct dm_buffer **bp) +{ + int need_submit; + struct dm_buffer *b; + + dm_bufio_lock(c); + b = __bufio_new(c, block, nf, bp, &need_submit); + dm_bufio_unlock(c); + + if (!b || IS_ERR(b)) + return b; + + if (need_submit) + submit_io(b, READ, b->block, read_endio); + + wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE); + + if (b->read_error) { + int error = b->read_error; + + dm_bufio_release(b); + + return ERR_PTR(error); + } + + *bp = b; + + return b->data; +} + +void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp) +{ + return new_read(c, block, NF_GET, bp); +} +EXPORT_SYMBOL_GPL(dm_bufio_get); + +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp) +{ + BUG_ON(dm_bufio_in_request()); + + return new_read(c, block, NF_READ, bp); +} +EXPORT_SYMBOL_GPL(dm_bufio_read); + +void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp) +{ + BUG_ON(dm_bufio_in_request()); + + return new_read(c, block, NF_FRESH, bp); +} +EXPORT_SYMBOL_GPL(dm_bufio_new); + +void dm_bufio_release(struct dm_buffer *b) +{ + struct dm_bufio_client *c = b->c; + + dm_bufio_lock(c); + + BUG_ON(test_bit(B_READING, &b->state)); + BUG_ON(!b->hold_count); + + b->hold_count--; + if (!b->hold_count) { + wake_up(&c->free_buffer_wait); + + /* + * If there were errors on the buffer, and the buffer is not + * to be written, free the buffer. There is no point in caching + * invalid buffer. + */ + if ((b->read_error || b->write_error) && + !test_bit(B_WRITING, &b->state) && + !test_bit(B_DIRTY, &b->state)) { + __unlink_buffer(b); + __free_buffer_wake(b); + } + } + + dm_bufio_unlock(c); +} +EXPORT_SYMBOL_GPL(dm_bufio_release); + +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) +{ + struct dm_bufio_client *c = b->c; + + dm_bufio_lock(c); + + if (!test_and_set_bit(B_DIRTY, &b->state)) + __relink_lru(b, LIST_DIRTY); + + dm_bufio_unlock(c); +} +EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); + +void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) +{ + BUG_ON(dm_bufio_in_request()); + + dm_bufio_lock(c); + __write_dirty_buffers_async(c, 0); + dm_bufio_unlock(c); +} +EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async); + +/* + * For performance, it is essential that the buffers are written asynchronously + * and simultaneously (so that the block layer can merge the writes) and then + * waited upon. + * + * Finally, we flush hardware disk cache. + */ +int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) +{ + int a, f; + unsigned long buffers_processed = 0; + struct dm_buffer *b, *tmp; + + dm_bufio_lock(c); + __write_dirty_buffers_async(c, 0); + +again: + list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) { + int dropped_lock = 0; + + if (buffers_processed < c->n_buffers[LIST_DIRTY]) + buffers_processed++; + + BUG_ON(test_bit(B_READING, &b->state)); + + if (test_bit(B_WRITING, &b->state)) { + if (buffers_processed < c->n_buffers[LIST_DIRTY]) { + dropped_lock = 1; + b->hold_count++; + dm_bufio_unlock(c); + wait_on_bit(&b->state, B_WRITING, + do_io_schedule, + TASK_UNINTERRUPTIBLE); + dm_bufio_lock(c); + b->hold_count--; + } else + wait_on_bit(&b->state, B_WRITING, + do_io_schedule, + TASK_UNINTERRUPTIBLE); + } + + if (!test_bit(B_DIRTY, &b->state) && + !test_bit(B_WRITING, &b->state)) + __relink_lru(b, LIST_CLEAN); + + dm_bufio_cond_resched(); + + /* + * If we dropped the lock, the list is no longer consistent, + * so we must restart the search. + * + * In the most common case, the buffer just processed is + * relinked to the clean list, so we won't loop scanning the + * same buffer again and again. + * + * This may livelock if there is another thread simultaneously + * dirtying buffers, so we count the number of buffers walked + * and if it exceeds the total number of buffers, it means that + * someone is doing some writes simultaneously with us. In + * this case, stop, dropping the lock. + */ + if (dropped_lock) + goto again; + } + wake_up(&c->free_buffer_wait); + dm_bufio_unlock(c); + + a = xchg(&c->async_write_error, 0); + f = dm_bufio_issue_flush(c); + if (a) + return a; + + return f; +} +EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); + +/* + * Use dm-io to send and empty barrier flush the device. + */ +int dm_bufio_issue_flush(struct dm_bufio_client *c) +{ + struct dm_io_request io_req = { + .bi_rw = REQ_FLUSH, + .mem.type = DM_IO_KMEM, + .mem.ptr.addr = NULL, + .client = c->dm_io, + }; + struct dm_io_region io_reg = { + .bdev = c->bdev, + .sector = 0, + .count = 0, + }; + + BUG_ON(dm_bufio_in_request()); + + return dm_io(&io_req, 1, &io_reg, NULL); +} +EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); + +/* + * We first delete any other buffer that may be at that new location. + * + * Then, we write the buffer to the original location if it was dirty. + * + * Then, if we are the only one who is holding the buffer, relink the buffer + * in the hash queue for the new location. + * + * If there was someone else holding the buffer, we write it to the new + * location but not relink it, because that other user needs to have the buffer + * at the same place. + */ +void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block) +{ + struct dm_bufio_client *c = b->c; + struct dm_buffer *new; + + BUG_ON(dm_bufio_in_request()); + + dm_bufio_lock(c); + +retry: + new = __find(c, new_block); + if (new) { + if (new->hold_count) { + __wait_for_free_buffer(c); + goto retry; + } + + /* + * FIXME: Is there any point waiting for a write that's going + * to be overwritten in a bit? + */ + __make_buffer_clean(new); + __unlink_buffer(new); + __free_buffer_wake(new); + } + + BUG_ON(!b->hold_count); + BUG_ON(test_bit(B_READING, &b->state)); + + __write_dirty_buffer(b); + if (b->hold_count == 1) { + wait_on_bit(&b->state, B_WRITING, + do_io_schedule, TASK_UNINTERRUPTIBLE); + set_bit(B_DIRTY, &b->state); + __unlink_buffer(b); + __link_buffer(b, new_block, LIST_DIRTY); + } else { + sector_t old_block; + wait_on_bit_lock(&b->state, B_WRITING, + do_io_schedule, TASK_UNINTERRUPTIBLE); + /* + * Relink buffer to "new_block" so that write_callback + * sees "new_block" as a block number. + * After the write, link the buffer back to old_block. + * All this must be done in bufio lock, so that block number + * change isn't visible to other threads. + */ + old_block = b->block; + __unlink_buffer(b); + __link_buffer(b, new_block, b->list_mode); + submit_io(b, WRITE, new_block, write_endio); + wait_on_bit(&b->state, B_WRITING, + do_io_schedule, TASK_UNINTERRUPTIBLE); + __unlink_buffer(b); + __link_buffer(b, old_block, b->list_mode); + } + + dm_bufio_unlock(c); + dm_bufio_release(b); +} +EXPORT_SYMBOL_GPL(dm_bufio_release_move); + +unsigned dm_bufio_get_block_size(struct dm_bufio_client *c) +{ + return c->block_size; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); + +sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) +{ + return i_size_read(c->bdev->bd_inode) >> + (SECTOR_SHIFT + c->sectors_per_block_bits); +} +EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); + +sector_t dm_bufio_get_block_number(struct dm_buffer *b) +{ + return b->block; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_block_number); + +void *dm_bufio_get_block_data(struct dm_buffer *b) +{ + return b->data; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_block_data); + +void *dm_bufio_get_aux_data(struct dm_buffer *b) +{ + return b + 1; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data); + +struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b) +{ + return b->c; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_client); + +static void drop_buffers(struct dm_bufio_client *c) +{ + struct dm_buffer *b; + int i; + + BUG_ON(dm_bufio_in_request()); + + /* + * An optimization so that the buffers are not written one-by-one. + */ + dm_bufio_write_dirty_buffers_async(c); + + dm_bufio_lock(c); + + while ((b = __get_unclaimed_buffer(c))) + __free_buffer_wake(b); + + for (i = 0; i < LIST_SIZE; i++) + list_for_each_entry(b, &c->lru[i], lru_list) + DMERR("leaked buffer %llx, hold count %u, list %d", + (unsigned long long)b->block, b->hold_count, i); + + for (i = 0; i < LIST_SIZE; i++) + BUG_ON(!list_empty(&c->lru[i])); + + dm_bufio_unlock(c); +} + +/* + * Test if the buffer is unused and too old, and commit it. + * At if noio is set, we must not do any I/O because we hold + * dm_bufio_clients_lock and we would risk deadlock if the I/O gets rerouted to + * different bufio client. + */ +static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, + unsigned long max_jiffies) +{ + if (jiffies - b->last_accessed < max_jiffies) + return 1; + + if (!(gfp & __GFP_IO)) { + if (test_bit(B_READING, &b->state) || + test_bit(B_WRITING, &b->state) || + test_bit(B_DIRTY, &b->state)) + return 1; + } + + if (b->hold_count) + return 1; + + __make_buffer_clean(b); + __unlink_buffer(b); + __free_buffer_wake(b); + + return 0; +} + +static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, + struct shrink_control *sc) +{ + int l; + struct dm_buffer *b, *tmp; + + for (l = 0; l < LIST_SIZE; l++) { + list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) + if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) && + !--nr_to_scan) + return; + dm_bufio_cond_resched(); + } +} + +static int shrink(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct dm_bufio_client *c = + container_of(shrinker, struct dm_bufio_client, shrinker); + unsigned long r; + unsigned long nr_to_scan = sc->nr_to_scan; + + if (sc->gfp_mask & __GFP_IO) + dm_bufio_lock(c); + else if (!dm_bufio_trylock(c)) + return !nr_to_scan ? 0 : -1; + + if (nr_to_scan) + __scan(c, nr_to_scan, sc); + + r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; + if (r > INT_MAX) + r = INT_MAX; + + dm_bufio_unlock(c); + + return r; +} + +/* + * Create the buffering interface + */ +struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size, + unsigned reserved_buffers, unsigned aux_size, + void (*alloc_callback)(struct dm_buffer *), + void (*write_callback)(struct dm_buffer *)) +{ + int r; + struct dm_bufio_client *c; + unsigned i; + + BUG_ON(block_size < 1 << SECTOR_SHIFT || + (block_size & (block_size - 1))); + + c = kmalloc(sizeof(*c), GFP_KERNEL); + if (!c) { + r = -ENOMEM; + goto bad_client; + } + c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS); + if (!c->cache_hash) { + r = -ENOMEM; + goto bad_hash; + } + + c->bdev = bdev; + c->block_size = block_size; + c->sectors_per_block_bits = ffs(block_size) - 1 - SECTOR_SHIFT; + c->pages_per_block_bits = (ffs(block_size) - 1 >= PAGE_SHIFT) ? + ffs(block_size) - 1 - PAGE_SHIFT : 0; + c->blocks_per_page_bits = (ffs(block_size) - 1 < PAGE_SHIFT ? + PAGE_SHIFT - (ffs(block_size) - 1) : 0); + + c->aux_size = aux_size; + c->alloc_callback = alloc_callback; + c->write_callback = write_callback; + + for (i = 0; i < LIST_SIZE; i++) { + INIT_LIST_HEAD(&c->lru[i]); + c->n_buffers[i] = 0; + } + + for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++) + INIT_HLIST_HEAD(&c->cache_hash[i]); + + mutex_init(&c->lock); + INIT_LIST_HEAD(&c->reserved_buffers); + c->need_reserved_buffers = reserved_buffers; + + init_waitqueue_head(&c->free_buffer_wait); + c->async_write_error = 0; + + c->dm_io = dm_io_client_create(); + if (IS_ERR(c->dm_io)) { + r = PTR_ERR(c->dm_io); + goto bad_dm_io; + } + + mutex_lock(&dm_bufio_clients_lock); + if (c->blocks_per_page_bits) { + if (!DM_BUFIO_CACHE_NAME(c)) { + DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size); + if (!DM_BUFIO_CACHE_NAME(c)) { + r = -ENOMEM; + mutex_unlock(&dm_bufio_clients_lock); + goto bad_cache; + } + } + + if (!DM_BUFIO_CACHE(c)) { + DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c), + c->block_size, + c->block_size, 0, NULL); + if (!DM_BUFIO_CACHE(c)) { + r = -ENOMEM; + mutex_unlock(&dm_bufio_clients_lock); + goto bad_cache; + } + } + } + mutex_unlock(&dm_bufio_clients_lock); + + while (c->need_reserved_buffers) { + struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL); + + if (!b) { + r = -ENOMEM; + goto bad_buffer; + } + __free_buffer_wake(b); + } + + mutex_lock(&dm_bufio_clients_lock); + dm_bufio_client_count++; + list_add(&c->client_list, &dm_bufio_all_clients); + __cache_size_refresh(); + mutex_unlock(&dm_bufio_clients_lock); + + c->shrinker.shrink = shrink; + c->shrinker.seeks = 1; + c->shrinker.batch = 0; + register_shrinker(&c->shrinker); + + return c; + +bad_buffer: +bad_cache: + while (!list_empty(&c->reserved_buffers)) { + struct dm_buffer *b = list_entry(c->reserved_buffers.next, + struct dm_buffer, lru_list); + list_del(&b->lru_list); + free_buffer(b); + } + dm_io_client_destroy(c->dm_io); +bad_dm_io: + vfree(c->cache_hash); +bad_hash: + kfree(c); +bad_client: + return ERR_PTR(r); +} +EXPORT_SYMBOL_GPL(dm_bufio_client_create); + +/* + * Free the buffering interface. + * It is required that there are no references on any buffers. + */ +void dm_bufio_client_destroy(struct dm_bufio_client *c) +{ + unsigned i; + + drop_buffers(c); + + unregister_shrinker(&c->shrinker); + + mutex_lock(&dm_bufio_clients_lock); + + list_del(&c->client_list); + dm_bufio_client_count--; + __cache_size_refresh(); + + mutex_unlock(&dm_bufio_clients_lock); + + for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++) + BUG_ON(!hlist_empty(&c->cache_hash[i])); + + BUG_ON(c->need_reserved_buffers); + + while (!list_empty(&c->reserved_buffers)) { + struct dm_buffer *b = list_entry(c->reserved_buffers.next, + struct dm_buffer, lru_list); + list_del(&b->lru_list); + free_buffer(b); + } + + for (i = 0; i < LIST_SIZE; i++) + if (c->n_buffers[i]) + DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]); + + for (i = 0; i < LIST_SIZE; i++) + BUG_ON(c->n_buffers[i]); + + dm_io_client_destroy(c->dm_io); + vfree(c->cache_hash); + kfree(c); +} +EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); + +static void cleanup_old_buffers(void) +{ + unsigned long max_age = dm_bufio_max_age; + struct dm_bufio_client *c; + + barrier(); + + if (max_age > ULONG_MAX / HZ) + max_age = ULONG_MAX / HZ; + + mutex_lock(&dm_bufio_clients_lock); + list_for_each_entry(c, &dm_bufio_all_clients, client_list) { + if (!dm_bufio_trylock(c)) + continue; + + while (!list_empty(&c->lru[LIST_CLEAN])) { + struct dm_buffer *b; + b = list_entry(c->lru[LIST_CLEAN].prev, + struct dm_buffer, lru_list); + if (__cleanup_old_buffer(b, 0, max_age * HZ)) + break; + dm_bufio_cond_resched(); + } + + dm_bufio_unlock(c); + dm_bufio_cond_resched(); + } + mutex_unlock(&dm_bufio_clients_lock); +} + +static struct workqueue_struct *dm_bufio_wq; +static struct delayed_work dm_bufio_work; + +static void work_fn(struct work_struct *w) +{ + cleanup_old_buffers(); + + queue_delayed_work(dm_bufio_wq, &dm_bufio_work, + DM_BUFIO_WORK_TIMER_SECS * HZ); +} + +/*---------------------------------------------------------------- + * Module setup + *--------------------------------------------------------------*/ + +/* + * This is called only once for the whole dm_bufio module. + * It initializes memory limit. + */ +static int __init dm_bufio_init(void) +{ + __u64 mem; + + memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches); + memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names); + + mem = (__u64)((totalram_pages - totalhigh_pages) * + DM_BUFIO_MEMORY_PERCENT / 100) << PAGE_SHIFT; + + if (mem > ULONG_MAX) + mem = ULONG_MAX; + +#ifdef CONFIG_MMU + /* + * Get the size of vmalloc space the same way as VMALLOC_TOTAL + * in fs/proc/internal.h + */ + if (mem > (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100) + mem = (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100; +#endif + + dm_bufio_default_cache_size = mem; + + mutex_lock(&dm_bufio_clients_lock); + __cache_size_refresh(); + mutex_unlock(&dm_bufio_clients_lock); + + dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache"); + if (!dm_bufio_wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&dm_bufio_work, work_fn); + queue_delayed_work(dm_bufio_wq, &dm_bufio_work, + DM_BUFIO_WORK_TIMER_SECS * HZ); + + return 0; +} + +/* + * This is called once when unloading the dm_bufio module. + */ +static void __exit dm_bufio_exit(void) +{ + int bug = 0; + int i; + + cancel_delayed_work_sync(&dm_bufio_work); + destroy_workqueue(dm_bufio_wq); + + for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) { + struct kmem_cache *kc = dm_bufio_caches[i]; + + if (kc) + kmem_cache_destroy(kc); + } + + for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++) + kfree(dm_bufio_cache_names[i]); + + if (dm_bufio_client_count) { + DMCRIT("%s: dm_bufio_client_count leaked: %d", + __func__, dm_bufio_client_count); + bug = 1; + } + + if (dm_bufio_current_allocated) { + DMCRIT("%s: dm_bufio_current_allocated leaked: %lu", + __func__, dm_bufio_current_allocated); + bug = 1; + } + + if (dm_bufio_allocated_get_free_pages) { + DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu", + __func__, dm_bufio_allocated_get_free_pages); + bug = 1; + } + + if (dm_bufio_allocated_vmalloc) { + DMCRIT("%s: dm_bufio_vmalloc leaked: %lu", + __func__, dm_bufio_allocated_vmalloc); + bug = 1; + } + + if (bug) + BUG(); +} + +module_init(dm_bufio_init) +module_exit(dm_bufio_exit) + +module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); + +module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); + +module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); + +module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO); +MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc"); + +module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO); +MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages"); + +module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO); +MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc"); + +module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO); +MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache"); + +MODULE_AUTHOR("Mikulas Patocka "); +MODULE_DESCRIPTION(DM_NAME " buffered I/O library"); +MODULE_LICENSE("GPL"); diff --cc drivers/md/persistent-data/dm-btree-remove.c index 65fd85ec6514,000000000000..023fbc2d389e mode 100644,000000..100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@@ -1,566 -1,0 +1,566 @@@ +/* + * Copyright (C) 2011 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-btree.h" +#include "dm-btree-internal.h" +#include "dm-transaction-manager.h" + - #include ++#include + +/* + * Removing an entry from a btree + * ============================== + * + * A very important constraint for our btree is that no node, except the + * root, may have fewer than a certain number of entries. + * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES). + * + * Ensuring this is complicated by the way we want to only ever hold the + * locks on 2 nodes concurrently, and only change nodes in a top to bottom + * fashion. + * + * Each node may have a left or right sibling. When decending the spine, + * if a node contains only MIN_ENTRIES then we try and increase this to at + * least MIN_ENTRIES + 1. We do this in the following ways: + * + * [A] No siblings => this can only happen if the node is the root, in which + * case we copy the childs contents over the root. + * + * [B] No left sibling + * ==> rebalance(node, right sibling) + * + * [C] No right sibling + * ==> rebalance(left sibling, node) + * + * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD + * ==> delete node adding it's contents to left and right + * + * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD + * ==> rebalance(left, node, right) + * + * After these operations it's possible that the our original node no + * longer contains the desired sub tree. For this reason this rebalancing + * is performed on the children of the current node. This also avoids + * having a special case for the root. + * + * Once this rebalancing has occurred we can then step into the child node + * for internal nodes. Or delete the entry for leaf nodes. + */ + +/* + * Some little utilities for moving node data around. + */ +static void node_shift(struct node *n, int shift) +{ + uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); + uint32_t value_size = le32_to_cpu(n->header.value_size); + + if (shift < 0) { + shift = -shift; + BUG_ON(shift > nr_entries); + BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift, value_size)); + memmove(key_ptr(n, 0), + key_ptr(n, shift), + (nr_entries - shift) * sizeof(__le64)); + memmove(value_ptr(n, 0, value_size), + value_ptr(n, shift, value_size), + (nr_entries - shift) * value_size); + } else { + BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries)); + memmove(key_ptr(n, shift), + key_ptr(n, 0), + nr_entries * sizeof(__le64)); + memmove(value_ptr(n, shift, value_size), + value_ptr(n, 0, value_size), + nr_entries * value_size); + } +} + +static void node_copy(struct node *left, struct node *right, int shift) +{ + uint32_t nr_left = le32_to_cpu(left->header.nr_entries); + uint32_t value_size = le32_to_cpu(left->header.value_size); + BUG_ON(value_size != le32_to_cpu(right->header.value_size)); + + if (shift < 0) { + shift = -shift; + BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries)); + memcpy(key_ptr(left, nr_left), + key_ptr(right, 0), + shift * sizeof(__le64)); + memcpy(value_ptr(left, nr_left, value_size), + value_ptr(right, 0, value_size), + shift * value_size); + } else { + BUG_ON(shift > le32_to_cpu(right->header.max_entries)); + memcpy(key_ptr(right, 0), + key_ptr(left, nr_left - shift), + shift * sizeof(__le64)); + memcpy(value_ptr(right, 0, value_size), + value_ptr(left, nr_left - shift, value_size), + shift * value_size); + } +} + +/* + * Delete a specific entry from a leaf node. + */ +static void delete_at(struct node *n, unsigned index) +{ + unsigned nr_entries = le32_to_cpu(n->header.nr_entries); + unsigned nr_to_copy = nr_entries - (index + 1); + uint32_t value_size = le32_to_cpu(n->header.value_size); + BUG_ON(index >= nr_entries); + + if (nr_to_copy) { + memmove(key_ptr(n, index), + key_ptr(n, index + 1), + nr_to_copy * sizeof(__le64)); + + memmove(value_ptr(n, index, value_size), + value_ptr(n, index + 1, value_size), + nr_to_copy * value_size); + } + + n->header.nr_entries = cpu_to_le32(nr_entries - 1); +} + +static unsigned del_threshold(struct node *n) +{ + return le32_to_cpu(n->header.max_entries) / 3; +} + +static unsigned merge_threshold(struct node *n) +{ + /* + * The extra one is because we know we're potentially going to + * delete an entry. + */ + return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1; +} + +struct child { + unsigned index; + struct dm_block *block; + struct node *n; +}; + +static struct dm_btree_value_type le64_type = { + .context = NULL, + .size = sizeof(__le64), + .inc = NULL, + .dec = NULL, + .equal = NULL +}; + +static int init_child(struct dm_btree_info *info, struct node *parent, + unsigned index, struct child *result) +{ + int r, inc; + dm_block_t root; + + result->index = index; + root = value64(parent, index); + + r = dm_tm_shadow_block(info->tm, root, &btree_node_validator, + &result->block, &inc); + if (r) + return r; + + result->n = dm_block_data(result->block); + + if (inc) + inc_children(info->tm, result->n, &le64_type); + + *((__le64 *) value_ptr(parent, index, sizeof(__le64))) = + cpu_to_le64(dm_block_location(result->block)); + + return 0; +} + +static int exit_child(struct dm_btree_info *info, struct child *c) +{ + return dm_tm_unlock(info->tm, c->block); +} + +static void shift(struct node *left, struct node *right, int count) +{ + if (!count) + return; + + if (count > 0) { + node_shift(right, count); + node_copy(left, right, count); + } else { + node_copy(left, right, count); + node_shift(right, count); + } + + left->header.nr_entries = + cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count); + BUG_ON(le32_to_cpu(left->header.nr_entries) > le32_to_cpu(left->header.max_entries)); + + right->header.nr_entries = + cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count); + BUG_ON(le32_to_cpu(right->header.nr_entries) > le32_to_cpu(right->header.max_entries)); +} + +static void __rebalance2(struct dm_btree_info *info, struct node *parent, + struct child *l, struct child *r) +{ + struct node *left = l->n; + struct node *right = r->n; + uint32_t nr_left = le32_to_cpu(left->header.nr_entries); + uint32_t nr_right = le32_to_cpu(right->header.nr_entries); + + if (nr_left + nr_right <= merge_threshold(left)) { + /* + * Merge + */ + node_copy(left, right, -nr_right); + left->header.nr_entries = cpu_to_le32(nr_left + nr_right); + delete_at(parent, r->index); + + /* + * We need to decrement the right block, but not it's + * children, since they're still referenced by left. + */ + dm_tm_dec(info->tm, dm_block_location(r->block)); + } else { + /* + * Rebalance. + */ + unsigned target_left = (nr_left + nr_right) / 2; + unsigned shift_ = nr_left - target_left; + BUG_ON(le32_to_cpu(left->header.max_entries) <= nr_left - shift_); + BUG_ON(le32_to_cpu(right->header.max_entries) <= nr_right + shift_); + shift(left, right, nr_left - target_left); + *key_ptr(parent, r->index) = right->keys[0]; + } +} + +static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, + unsigned left_index) +{ + int r; + struct node *parent; + struct child left, right; + + parent = dm_block_data(shadow_current(s)); + + r = init_child(info, parent, left_index, &left); + if (r) + return r; + + r = init_child(info, parent, left_index + 1, &right); + if (r) { + exit_child(info, &left); + return r; + } + + __rebalance2(info, parent, &left, &right); + + r = exit_child(info, &left); + if (r) { + exit_child(info, &right); + return r; + } + + return exit_child(info, &right); +} + +static void __rebalance3(struct dm_btree_info *info, struct node *parent, + struct child *l, struct child *c, struct child *r) +{ + struct node *left = l->n; + struct node *center = c->n; + struct node *right = r->n; + + uint32_t nr_left = le32_to_cpu(left->header.nr_entries); + uint32_t nr_center = le32_to_cpu(center->header.nr_entries); + uint32_t nr_right = le32_to_cpu(right->header.nr_entries); + uint32_t max_entries = le32_to_cpu(left->header.max_entries); + + unsigned target; + + BUG_ON(left->header.max_entries != center->header.max_entries); + BUG_ON(center->header.max_entries != right->header.max_entries); + + if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) { + /* + * Delete center node: + * + * We dump as many entries from center as possible into + * left, then the rest in right, then rebalance2. This + * wastes some cpu, but I want something simple atm. + */ + unsigned shift = min(max_entries - nr_left, nr_center); + + BUG_ON(nr_left + shift > max_entries); + node_copy(left, center, -shift); + left->header.nr_entries = cpu_to_le32(nr_left + shift); + + if (shift != nr_center) { + shift = nr_center - shift; + BUG_ON((nr_right + shift) >= max_entries); + node_shift(right, shift); + node_copy(center, right, shift); + right->header.nr_entries = cpu_to_le32(nr_right + shift); + } + *key_ptr(parent, r->index) = right->keys[0]; + + delete_at(parent, c->index); + r->index--; + + dm_tm_dec(info->tm, dm_block_location(c->block)); + __rebalance2(info, parent, l, r); + + return; + } + + /* + * Rebalance + */ + target = (nr_left + nr_center + nr_right) / 3; + BUG_ON(target > max_entries); + + /* + * Adjust the left node + */ + shift(left, center, nr_left - target); + + /* + * Adjust the right node + */ + shift(center, right, target - nr_right); + *key_ptr(parent, c->index) = center->keys[0]; + *key_ptr(parent, r->index) = right->keys[0]; +} + +static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, + unsigned left_index) +{ + int r; + struct node *parent = dm_block_data(shadow_current(s)); + struct child left, center, right; + + /* + * FIXME: fill out an array? + */ + r = init_child(info, parent, left_index, &left); + if (r) + return r; + + r = init_child(info, parent, left_index + 1, ¢er); + if (r) { + exit_child(info, &left); + return r; + } + + r = init_child(info, parent, left_index + 2, &right); + if (r) { + exit_child(info, &left); + exit_child(info, ¢er); + return r; + } + + __rebalance3(info, parent, &left, ¢er, &right); + + r = exit_child(info, &left); + if (r) { + exit_child(info, ¢er); + exit_child(info, &right); + return r; + } + + r = exit_child(info, ¢er); + if (r) { + exit_child(info, &right); + return r; + } + + r = exit_child(info, &right); + if (r) + return r; + + return 0; +} + +static int get_nr_entries(struct dm_transaction_manager *tm, + dm_block_t b, uint32_t *result) +{ + int r; + struct dm_block *block; + struct node *n; + + r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); + if (r) + return r; + + n = dm_block_data(block); + *result = le32_to_cpu(n->header.nr_entries); + + return dm_tm_unlock(tm, block); +} + +static int rebalance_children(struct shadow_spine *s, + struct dm_btree_info *info, uint64_t key) +{ + int i, r, has_left_sibling, has_right_sibling; + uint32_t child_entries; + struct node *n; + + n = dm_block_data(shadow_current(s)); + + if (le32_to_cpu(n->header.nr_entries) == 1) { + struct dm_block *child; + dm_block_t b = value64(n, 0); + + r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child); + if (r) + return r; + + memcpy(n, dm_block_data(child), + dm_bm_block_size(dm_tm_get_bm(info->tm))); + r = dm_tm_unlock(info->tm, child); + if (r) + return r; + + dm_tm_dec(info->tm, dm_block_location(child)); + return 0; + } + + i = lower_bound(n, key); + if (i < 0) + return -ENODATA; + + r = get_nr_entries(info->tm, value64(n, i), &child_entries); + if (r) + return r; + + if (child_entries > del_threshold(n)) + return 0; + + has_left_sibling = i > 0; + has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); + + if (!has_left_sibling) + r = rebalance2(s, info, i); + + else if (!has_right_sibling) + r = rebalance2(s, info, i - 1); + + else + r = rebalance3(s, info, i - 1); + + return r; +} + +static int do_leaf(struct node *n, uint64_t key, unsigned *index) +{ + int i = lower_bound(n, key); + + if ((i < 0) || + (i >= le32_to_cpu(n->header.nr_entries)) || + (le64_to_cpu(n->keys[i]) != key)) + return -ENODATA; + + *index = i; + + return 0; +} + +/* + * Prepares for removal from one level of the hierarchy. The caller must + * call delete_at() to remove the entry at index. + */ +static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, + struct dm_btree_value_type *vt, dm_block_t root, + uint64_t key, unsigned *index) +{ + int i = *index, r; + struct node *n; + + for (;;) { + r = shadow_step(s, root, vt); + if (r < 0) + break; + + /* + * We have to patch up the parent node, ugly, but I don't + * see a way to do this automatically as part of the spine + * op. + */ + if (shadow_has_parent(s)) { + __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); + memcpy(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(__le64)), + &location, sizeof(__le64)); + } + + n = dm_block_data(shadow_current(s)); + + if (le32_to_cpu(n->header.flags) & LEAF_NODE) + return do_leaf(n, key, index); + + r = rebalance_children(s, info, key); + if (r) + break; + + n = dm_block_data(shadow_current(s)); + if (le32_to_cpu(n->header.flags) & LEAF_NODE) + return do_leaf(n, key, index); + + i = lower_bound(n, key); + + /* + * We know the key is present, or else + * rebalance_children would have returned + * -ENODATA + */ + root = value64(n, i); + } + + return r; +} + +int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, + uint64_t *keys, dm_block_t *new_root) +{ + unsigned level, last_level = info->levels - 1; + int index = 0, r = 0; + struct shadow_spine spine; + struct node *n; + + init_shadow_spine(&spine, info); + for (level = 0; level < info->levels; level++) { + r = remove_raw(&spine, info, + (level == last_level ? + &info->value_type : &le64_type), + root, keys[level], (unsigned *)&index); + if (r < 0) + break; + + n = dm_block_data(shadow_current(&spine)); + if (level != last_level) { + root = value64(n, index); + continue; + } + + BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries)); + + if (info->value_type.dec) + info->value_type.dec(info->value_type.context, + value_ptr(n, index, info->value_type.size)); + + delete_at(n, index); + } + + *new_root = shadow_root(&spine); + exit_shadow_spine(&spine); + + return r; +} +EXPORT_SYMBOL_GPL(dm_btree_remove); diff --cc drivers/md/persistent-data/dm-btree.c index e0638be53ea4,000000000000..bd1e7ffbe26c mode 100644,000000..100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@@ -1,805 -1,0 +1,805 @@@ +/* + * Copyright (C) 2011 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-btree-internal.h" +#include "dm-space-map.h" +#include "dm-transaction-manager.h" + - #include ++#include +#include + +#define DM_MSG_PREFIX "btree" + +/*---------------------------------------------------------------- + * Array manipulation + *--------------------------------------------------------------*/ +static void memcpy_disk(void *dest, const void *src, size_t len) + __dm_written_to_disk(src) +{ + memcpy(dest, src, len); + __dm_unbless_for_disk(src); +} + +static void array_insert(void *base, size_t elt_size, unsigned nr_elts, + unsigned index, void *elt) + __dm_written_to_disk(elt) +{ + if (index < nr_elts) + memmove(base + (elt_size * (index + 1)), + base + (elt_size * index), + (nr_elts - index) * elt_size); + + memcpy_disk(base + (elt_size * index), elt, elt_size); +} + +/*----------------------------------------------------------------*/ + +/* makes the assumption that no two keys are the same. */ +static int bsearch(struct node *n, uint64_t key, int want_hi) +{ + int lo = -1, hi = le32_to_cpu(n->header.nr_entries); + + while (hi - lo > 1) { + int mid = lo + ((hi - lo) / 2); + uint64_t mid_key = le64_to_cpu(n->keys[mid]); + + if (mid_key == key) + return mid; + + if (mid_key < key) + lo = mid; + else + hi = mid; + } + + return want_hi ? hi : lo; +} + +int lower_bound(struct node *n, uint64_t key) +{ + return bsearch(n, key, 0); +} + +void inc_children(struct dm_transaction_manager *tm, struct node *n, + struct dm_btree_value_type *vt) +{ + unsigned i; + uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); + + if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) + for (i = 0; i < nr_entries; i++) + dm_tm_inc(tm, value64(n, i)); + else if (vt->inc) + for (i = 0; i < nr_entries; i++) + vt->inc(vt->context, + value_ptr(n, i, vt->size)); +} + +static int insert_at(size_t value_size, struct node *node, unsigned index, + uint64_t key, void *value) + __dm_written_to_disk(value) +{ + uint32_t nr_entries = le32_to_cpu(node->header.nr_entries); + __le64 key_le = cpu_to_le64(key); + + if (index > nr_entries || + index >= le32_to_cpu(node->header.max_entries)) { + DMERR("too many entries in btree node for insert"); + __dm_unbless_for_disk(value); + return -ENOMEM; + } + + __dm_bless_for_disk(&key_le); + + array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le); + array_insert(value_base(node), value_size, nr_entries, index, value); + node->header.nr_entries = cpu_to_le32(nr_entries + 1); + + return 0; +} + +/*----------------------------------------------------------------*/ + +/* + * We want 3n entries (for some n). This works more nicely for repeated + * insert remove loops than (2n + 1). + */ +static uint32_t calc_max_entries(size_t value_size, size_t block_size) +{ + uint32_t total, n; + size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */ + + block_size -= sizeof(struct node_header); + total = block_size / elt_size; + n = total / 3; /* rounds down */ + + return 3 * n; +} + +int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) +{ + int r; + struct dm_block *b; + struct node *n; + size_t block_size; + uint32_t max_entries; + + r = new_block(info, &b); + if (r < 0) + return r; + + block_size = dm_bm_block_size(dm_tm_get_bm(info->tm)); + max_entries = calc_max_entries(info->value_type.size, block_size); + + n = dm_block_data(b); + memset(n, 0, block_size); + n->header.flags = cpu_to_le32(LEAF_NODE); + n->header.nr_entries = cpu_to_le32(0); + n->header.max_entries = cpu_to_le32(max_entries); + n->header.value_size = cpu_to_le32(info->value_type.size); + + *root = dm_block_location(b); + return unlock_block(info, b); +} +EXPORT_SYMBOL_GPL(dm_btree_empty); + +/*----------------------------------------------------------------*/ + +/* + * Deletion uses a recursive algorithm, since we have limited stack space + * we explicitly manage our own stack on the heap. + */ +#define MAX_SPINE_DEPTH 64 +struct frame { + struct dm_block *b; + struct node *n; + unsigned level; + unsigned nr_children; + unsigned current_child; +}; + +struct del_stack { + struct dm_transaction_manager *tm; + int top; + struct frame spine[MAX_SPINE_DEPTH]; +}; + +static int top_frame(struct del_stack *s, struct frame **f) +{ + if (s->top < 0) { + DMERR("btree deletion stack empty"); + return -EINVAL; + } + + *f = s->spine + s->top; + + return 0; +} + +static int unprocessed_frames(struct del_stack *s) +{ + return s->top >= 0; +} + +static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) +{ + int r; + uint32_t ref_count; + + if (s->top >= MAX_SPINE_DEPTH - 1) { + DMERR("btree deletion stack out of memory"); + return -ENOMEM; + } + + r = dm_tm_ref(s->tm, b, &ref_count); + if (r) + return r; + + if (ref_count > 1) + /* + * This is a shared node, so we can just decrement it's + * reference counter and leave the children. + */ + dm_tm_dec(s->tm, b); + + else { + struct frame *f = s->spine + ++s->top; + + r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b); + if (r) { + s->top--; + return r; + } + + f->n = dm_block_data(f->b); + f->level = level; + f->nr_children = le32_to_cpu(f->n->header.nr_entries); + f->current_child = 0; + } + + return 0; +} + +static void pop_frame(struct del_stack *s) +{ + struct frame *f = s->spine + s->top--; + + dm_tm_dec(s->tm, dm_block_location(f->b)); + dm_tm_unlock(s->tm, f->b); +} + +int dm_btree_del(struct dm_btree_info *info, dm_block_t root) +{ + int r; + struct del_stack *s; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + s->tm = info->tm; + s->top = -1; + + r = push_frame(s, root, 1); + if (r) + goto out; + + while (unprocessed_frames(s)) { + uint32_t flags; + struct frame *f; + dm_block_t b; + + r = top_frame(s, &f); + if (r) + goto out; + + if (f->current_child >= f->nr_children) { + pop_frame(s); + continue; + } + + flags = le32_to_cpu(f->n->header.flags); + if (flags & INTERNAL_NODE) { + b = value64(f->n, f->current_child); + f->current_child++; + r = push_frame(s, b, f->level); + if (r) + goto out; + + } else if (f->level != (info->levels - 1)) { + b = value64(f->n, f->current_child); + f->current_child++; + r = push_frame(s, b, f->level + 1); + if (r) + goto out; + + } else { + if (info->value_type.dec) { + unsigned i; + + for (i = 0; i < f->nr_children; i++) + info->value_type.dec(info->value_type.context, + value_ptr(f->n, i, info->value_type.size)); + } + f->current_child = f->nr_children; + } + } + +out: + kfree(s); + return r; +} +EXPORT_SYMBOL_GPL(dm_btree_del); + +/*----------------------------------------------------------------*/ + +static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, + int (*search_fn)(struct node *, uint64_t), + uint64_t *result_key, void *v, size_t value_size) +{ + int i, r; + uint32_t flags, nr_entries; + + do { + r = ro_step(s, block); + if (r < 0) + return r; + + i = search_fn(ro_node(s), key); + + flags = le32_to_cpu(ro_node(s)->header.flags); + nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries); + if (i < 0 || i >= nr_entries) + return -ENODATA; + + if (flags & INTERNAL_NODE) + block = value64(ro_node(s), i); + + } while (!(flags & LEAF_NODE)); + + *result_key = le64_to_cpu(ro_node(s)->keys[i]); + memcpy(v, value_ptr(ro_node(s), i, value_size), value_size); + + return 0; +} + +int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, + uint64_t *keys, void *value_le) +{ + unsigned level, last_level = info->levels - 1; + int r = -ENODATA; + uint64_t rkey; + __le64 internal_value_le; + struct ro_spine spine; + + init_ro_spine(&spine, info); + for (level = 0; level < info->levels; level++) { + size_t size; + void *value_p; + + if (level == last_level) { + value_p = value_le; + size = info->value_type.size; + + } else { + value_p = &internal_value_le; + size = sizeof(uint64_t); + } + + r = btree_lookup_raw(&spine, root, keys[level], + lower_bound, &rkey, + value_p, size); + + if (!r) { + if (rkey != keys[level]) { + exit_ro_spine(&spine); + return -ENODATA; + } + } else { + exit_ro_spine(&spine); + return r; + } + + root = le64_to_cpu(internal_value_le); + } + exit_ro_spine(&spine); + + return r; +} +EXPORT_SYMBOL_GPL(dm_btree_lookup); + +/* + * Splits a node by creating a sibling node and shifting half the nodes + * contents across. Assumes there is a parent node, and it has room for + * another child. + * + * Before: + * +--------+ + * | Parent | + * +--------+ + * | + * v + * +----------+ + * | A ++++++ | + * +----------+ + * + * + * After: + * +--------+ + * | Parent | + * +--------+ + * | | + * v +------+ + * +---------+ | + * | A* +++ | v + * +---------+ +-------+ + * | B +++ | + * +-------+ + * + * Where A* is a shadow of A. + */ +static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, + unsigned parent_index, uint64_t key) +{ + int r; + size_t size; + unsigned nr_left, nr_right; + struct dm_block *left, *right, *parent; + struct node *ln, *rn, *pn; + __le64 location; + + left = shadow_current(s); + + r = new_block(s->info, &right); + if (r < 0) + return r; + + ln = dm_block_data(left); + rn = dm_block_data(right); + + nr_left = le32_to_cpu(ln->header.nr_entries) / 2; + nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left; + + ln->header.nr_entries = cpu_to_le32(nr_left); + + rn->header.flags = ln->header.flags; + rn->header.nr_entries = cpu_to_le32(nr_right); + rn->header.max_entries = ln->header.max_entries; + rn->header.value_size = ln->header.value_size; + memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0])); + + size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ? + sizeof(uint64_t) : s->info->value_type.size; + memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size), + size * nr_right); + + /* + * Patch up the parent + */ + parent = shadow_parent(s); + + pn = dm_block_data(parent); + location = cpu_to_le64(dm_block_location(left)); + __dm_bless_for_disk(&location); + memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)), + &location, sizeof(__le64)); + + location = cpu_to_le64(dm_block_location(right)); + __dm_bless_for_disk(&location); + + r = insert_at(sizeof(__le64), pn, parent_index + 1, + le64_to_cpu(rn->keys[0]), &location); + if (r) + return r; + + if (key < le64_to_cpu(rn->keys[0])) { + unlock_block(s->info, right); + s->nodes[1] = left; + } else { + unlock_block(s->info, left); + s->nodes[1] = right; + } + + return 0; +} + +/* + * Splits a node by creating two new children beneath the given node. + * + * Before: + * +----------+ + * | A ++++++ | + * +----------+ + * + * + * After: + * +------------+ + * | A (shadow) | + * +------------+ + * | | + * +------+ +----+ + * | | + * v v + * +-------+ +-------+ + * | B +++ | | C +++ | + * +-------+ +-------+ + */ +static int btree_split_beneath(struct shadow_spine *s, uint64_t key) +{ + int r; + size_t size; + unsigned nr_left, nr_right; + struct dm_block *left, *right, *new_parent; + struct node *pn, *ln, *rn; + __le64 val; + + new_parent = shadow_current(s); + + r = new_block(s->info, &left); + if (r < 0) + return r; + + r = new_block(s->info, &right); + if (r < 0) { + /* FIXME: put left */ + return r; + } + + pn = dm_block_data(new_parent); + ln = dm_block_data(left); + rn = dm_block_data(right); + + nr_left = le32_to_cpu(pn->header.nr_entries) / 2; + nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; + + ln->header.flags = pn->header.flags; + ln->header.nr_entries = cpu_to_le32(nr_left); + ln->header.max_entries = pn->header.max_entries; + ln->header.value_size = pn->header.value_size; + + rn->header.flags = pn->header.flags; + rn->header.nr_entries = cpu_to_le32(nr_right); + rn->header.max_entries = pn->header.max_entries; + rn->header.value_size = pn->header.value_size; + + memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); + memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); + + size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? + sizeof(__le64) : s->info->value_type.size; + memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size); + memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size), + nr_right * size); + + /* new_parent should just point to l and r now */ + pn->header.flags = cpu_to_le32(INTERNAL_NODE); + pn->header.nr_entries = cpu_to_le32(2); + pn->header.max_entries = cpu_to_le32( + calc_max_entries(sizeof(__le64), + dm_bm_block_size( + dm_tm_get_bm(s->info->tm)))); + pn->header.value_size = cpu_to_le32(sizeof(__le64)); + + val = cpu_to_le64(dm_block_location(left)); + __dm_bless_for_disk(&val); + pn->keys[0] = ln->keys[0]; + memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64)); + + val = cpu_to_le64(dm_block_location(right)); + __dm_bless_for_disk(&val); + pn->keys[1] = rn->keys[0]; + memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64)); + + /* + * rejig the spine. This is ugly, since it knows too + * much about the spine + */ + if (s->nodes[0] != new_parent) { + unlock_block(s->info, s->nodes[0]); + s->nodes[0] = new_parent; + } + if (key < le64_to_cpu(rn->keys[0])) { + unlock_block(s->info, right); + s->nodes[1] = left; + } else { + unlock_block(s->info, left); + s->nodes[1] = right; + } + s->count = 2; + + return 0; +} + +static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, + struct dm_btree_value_type *vt, + uint64_t key, unsigned *index) +{ + int r, i = *index, top = 1; + struct node *node; + + for (;;) { + r = shadow_step(s, root, vt); + if (r < 0) + return r; + + node = dm_block_data(shadow_current(s)); + + /* + * We have to patch up the parent node, ugly, but I don't + * see a way to do this automatically as part of the spine + * op. + */ + if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */ + __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); + + __dm_bless_for_disk(&location); + memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)), + &location, sizeof(__le64)); + } + + node = dm_block_data(shadow_current(s)); + + if (node->header.nr_entries == node->header.max_entries) { + if (top) + r = btree_split_beneath(s, key); + else + r = btree_split_sibling(s, root, i, key); + + if (r < 0) + return r; + } + + node = dm_block_data(shadow_current(s)); + + i = lower_bound(node, key); + + if (le32_to_cpu(node->header.flags) & LEAF_NODE) + break; + + if (i < 0) { + /* change the bounds on the lowest key */ + node->keys[0] = cpu_to_le64(key); + i = 0; + } + + root = value64(node, i); + top = 0; + } + + if (i < 0 || le64_to_cpu(node->keys[i]) != key) + i++; + + *index = i; + return 0; +} + +static int insert(struct dm_btree_info *info, dm_block_t root, + uint64_t *keys, void *value, dm_block_t *new_root, + int *inserted) + __dm_written_to_disk(value) +{ + int r, need_insert; + unsigned level, index = -1, last_level = info->levels - 1; + dm_block_t block = root; + struct shadow_spine spine; + struct node *n; + struct dm_btree_value_type le64_type; + + le64_type.context = NULL; + le64_type.size = sizeof(__le64); + le64_type.inc = NULL; + le64_type.dec = NULL; + le64_type.equal = NULL; + + init_shadow_spine(&spine, info); + + for (level = 0; level < (info->levels - 1); level++) { + r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index); + if (r < 0) + goto bad; + + n = dm_block_data(shadow_current(&spine)); + need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) || + (le64_to_cpu(n->keys[index]) != keys[level])); + + if (need_insert) { + dm_block_t new_tree; + __le64 new_le; + + r = dm_btree_empty(info, &new_tree); + if (r < 0) + goto bad; + + new_le = cpu_to_le64(new_tree); + __dm_bless_for_disk(&new_le); + + r = insert_at(sizeof(uint64_t), n, index, + keys[level], &new_le); + if (r) + goto bad; + } + + if (level < last_level) + block = value64(n, index); + } + + r = btree_insert_raw(&spine, block, &info->value_type, + keys[level], &index); + if (r < 0) + goto bad; + + n = dm_block_data(shadow_current(&spine)); + need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) || + (le64_to_cpu(n->keys[index]) != keys[level])); + + if (need_insert) { + if (inserted) + *inserted = 1; + + r = insert_at(info->value_type.size, n, index, + keys[level], value); + if (r) + goto bad_unblessed; + } else { + if (inserted) + *inserted = 0; + + if (info->value_type.dec && + (!info->value_type.equal || + !info->value_type.equal( + info->value_type.context, + value_ptr(n, index, info->value_type.size), + value))) { + info->value_type.dec(info->value_type.context, + value_ptr(n, index, info->value_type.size)); + } + memcpy_disk(value_ptr(n, index, info->value_type.size), + value, info->value_type.size); + } + + *new_root = shadow_root(&spine); + exit_shadow_spine(&spine); + + return 0; + +bad: + __dm_unbless_for_disk(value); +bad_unblessed: + exit_shadow_spine(&spine); + return r; +} + +int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, + uint64_t *keys, void *value, dm_block_t *new_root) + __dm_written_to_disk(value) +{ + return insert(info, root, keys, value, new_root, NULL); +} +EXPORT_SYMBOL_GPL(dm_btree_insert); + +int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, + uint64_t *keys, void *value, dm_block_t *new_root, + int *inserted) + __dm_written_to_disk(value) +{ + return insert(info, root, keys, value, new_root, inserted); +} +EXPORT_SYMBOL_GPL(dm_btree_insert_notify); + +/*----------------------------------------------------------------*/ + +static int find_highest_key(struct ro_spine *s, dm_block_t block, + uint64_t *result_key, dm_block_t *next_block) +{ + int i, r; + uint32_t flags; + + do { + r = ro_step(s, block); + if (r < 0) + return r; + + flags = le32_to_cpu(ro_node(s)->header.flags); + i = le32_to_cpu(ro_node(s)->header.nr_entries); + if (!i) + return -ENODATA; + else + i--; + + *result_key = le64_to_cpu(ro_node(s)->keys[i]); + if (next_block || flags & INTERNAL_NODE) + block = value64(ro_node(s), i); + + } while (flags & INTERNAL_NODE); + + if (next_block) + *next_block = block; + return 0; +} + +int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, + uint64_t *result_keys) +{ + int r = 0, count = 0, level; + struct ro_spine spine; + + init_ro_spine(&spine, info); + for (level = 0; level < info->levels; level++) { + r = find_highest_key(&spine, root, result_keys + level, + level == info->levels - 1 ? NULL : &root); + if (r == -ENODATA) { + r = 0; + break; + + } else if (r) + break; + + count++; + } + exit_ro_spine(&spine); + + return r ? r : count; +} +EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); diff --cc drivers/md/persistent-data/dm-space-map-checker.c index bb44a937fe63,000000000000..50ed53bf4aa2 mode 100644,000000..100644 --- a/drivers/md/persistent-data/dm-space-map-checker.c +++ b/drivers/md/persistent-data/dm-space-map-checker.c @@@ -1,437 -1,0 +1,438 @@@ +/* + * Copyright (C) 2011 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-space-map-checker.h" + +#include ++#include + +#ifdef CONFIG_DM_DEBUG_SPACE_MAPS + +#define DM_MSG_PREFIX "space map checker" + +/*----------------------------------------------------------------*/ + +struct count_array { + dm_block_t nr; + dm_block_t nr_free; + + uint32_t *counts; +}; + +static int ca_get_count(struct count_array *ca, dm_block_t b, uint32_t *count) +{ + if (b >= ca->nr) + return -EINVAL; + + *count = ca->counts[b]; + return 0; +} + +static int ca_count_more_than_one(struct count_array *ca, dm_block_t b, int *r) +{ + if (b >= ca->nr) + return -EINVAL; + + *r = ca->counts[b] > 1; + return 0; +} + +static int ca_set_count(struct count_array *ca, dm_block_t b, uint32_t count) +{ + uint32_t old_count; + + if (b >= ca->nr) + return -EINVAL; + + old_count = ca->counts[b]; + + if (!count && old_count) + ca->nr_free++; + + else if (count && !old_count) + ca->nr_free--; + + ca->counts[b] = count; + return 0; +} + +static int ca_inc_block(struct count_array *ca, dm_block_t b) +{ + if (b >= ca->nr) + return -EINVAL; + + ca_set_count(ca, b, ca->counts[b] + 1); + return 0; +} + +static int ca_dec_block(struct count_array *ca, dm_block_t b) +{ + if (b >= ca->nr) + return -EINVAL; + + BUG_ON(ca->counts[b] == 0); + ca_set_count(ca, b, ca->counts[b] - 1); + return 0; +} + +static int ca_create(struct count_array *ca, struct dm_space_map *sm) +{ + int r; + dm_block_t nr_blocks; + + r = dm_sm_get_nr_blocks(sm, &nr_blocks); + if (r) + return r; + + ca->nr = nr_blocks; + ca->nr_free = nr_blocks; + ca->counts = kzalloc(sizeof(*ca->counts) * nr_blocks, GFP_KERNEL); + if (!ca->counts) + return -ENOMEM; + + return 0; +} + +static int ca_load(struct count_array *ca, struct dm_space_map *sm) +{ + int r; + uint32_t count; + dm_block_t nr_blocks, i; + + r = dm_sm_get_nr_blocks(sm, &nr_blocks); + if (r) + return r; + + BUG_ON(ca->nr != nr_blocks); + + DMWARN("Loading debug space map from disk. This may take some time"); + for (i = 0; i < nr_blocks; i++) { + r = dm_sm_get_count(sm, i, &count); + if (r) { + DMERR("load failed"); + return r; + } + + ca_set_count(ca, i, count); + } + DMWARN("Load complete"); + + return 0; +} + +static int ca_extend(struct count_array *ca, dm_block_t extra_blocks) +{ + dm_block_t nr_blocks = ca->nr + extra_blocks; + uint32_t *counts = kzalloc(sizeof(*counts) * nr_blocks, GFP_KERNEL); + if (!counts) + return -ENOMEM; + + memcpy(counts, ca->counts, sizeof(*counts) * ca->nr); + kfree(ca->counts); + ca->nr = nr_blocks; + ca->nr_free += extra_blocks; + ca->counts = counts; + return 0; +} + +static int ca_commit(struct count_array *old, struct count_array *new) +{ + if (old->nr != new->nr) { + BUG_ON(old->nr > new->nr); + ca_extend(old, new->nr - old->nr); + } + + BUG_ON(old->nr != new->nr); + old->nr_free = new->nr_free; + memcpy(old->counts, new->counts, sizeof(*old->counts) * old->nr); + return 0; +} + +static void ca_destroy(struct count_array *ca) +{ + kfree(ca->counts); +} + +/*----------------------------------------------------------------*/ + +struct sm_checker { + struct dm_space_map sm; + + struct count_array old_counts; + struct count_array counts; + + struct dm_space_map *real_sm; +}; + +static void sm_checker_destroy(struct dm_space_map *sm) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + + dm_sm_destroy(smc->real_sm); + ca_destroy(&smc->old_counts); + ca_destroy(&smc->counts); + kfree(smc); +} + +static int sm_checker_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + int r = dm_sm_get_nr_blocks(smc->real_sm, count); + if (!r) + BUG_ON(smc->old_counts.nr != *count); + return r; +} + +static int sm_checker_get_nr_free(struct dm_space_map *sm, dm_block_t *count) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + int r = dm_sm_get_nr_free(smc->real_sm, count); + if (!r) { + /* + * Slow, but we know it's correct. + */ + dm_block_t b, n = 0; + for (b = 0; b < smc->old_counts.nr; b++) + if (smc->old_counts.counts[b] == 0 && + smc->counts.counts[b] == 0) + n++; + + if (n != *count) + DMERR("free block counts differ, checker %u, sm-disk:%u", + (unsigned) n, (unsigned) *count); + } + return r; +} + +static int sm_checker_new_block(struct dm_space_map *sm, dm_block_t *b) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + int r = dm_sm_new_block(smc->real_sm, b); + + if (!r) { + BUG_ON(*b >= smc->old_counts.nr); + BUG_ON(smc->old_counts.counts[*b] != 0); + BUG_ON(*b >= smc->counts.nr); + BUG_ON(smc->counts.counts[*b] != 0); + ca_set_count(&smc->counts, *b, 1); + } + + return r; +} + +static int sm_checker_inc_block(struct dm_space_map *sm, dm_block_t b) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + int r = dm_sm_inc_block(smc->real_sm, b); + int r2 = ca_inc_block(&smc->counts, b); + BUG_ON(r != r2); + return r; +} + +static int sm_checker_dec_block(struct dm_space_map *sm, dm_block_t b) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + int r = dm_sm_dec_block(smc->real_sm, b); + int r2 = ca_dec_block(&smc->counts, b); + BUG_ON(r != r2); + return r; +} + +static int sm_checker_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + uint32_t result2 = 0; + int r = dm_sm_get_count(smc->real_sm, b, result); + int r2 = ca_get_count(&smc->counts, b, &result2); + + BUG_ON(r != r2); + if (!r) + BUG_ON(*result != result2); + return r; +} + +static int sm_checker_count_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + int result2 = 0; + int r = dm_sm_count_is_more_than_one(smc->real_sm, b, result); + int r2 = ca_count_more_than_one(&smc->counts, b, &result2); + + BUG_ON(r != r2); + if (!r) + BUG_ON(!(*result) && result2); + return r; +} + +static int sm_checker_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + uint32_t old_rc; + int r = dm_sm_set_count(smc->real_sm, b, count); + int r2; + + BUG_ON(b >= smc->counts.nr); + old_rc = smc->counts.counts[b]; + r2 = ca_set_count(&smc->counts, b, count); + BUG_ON(r != r2); + + return r; +} + +static int sm_checker_commit(struct dm_space_map *sm) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + int r; + + r = dm_sm_commit(smc->real_sm); + if (r) + return r; + + r = ca_commit(&smc->old_counts, &smc->counts); + if (r) + return r; + + return 0; +} + +static int sm_checker_extend(struct dm_space_map *sm, dm_block_t extra_blocks) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + int r = dm_sm_extend(smc->real_sm, extra_blocks); + if (r) + return r; + + return ca_extend(&smc->counts, extra_blocks); +} + +static int sm_checker_root_size(struct dm_space_map *sm, size_t *result) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + return dm_sm_root_size(smc->real_sm, result); +} + +static int sm_checker_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len) +{ + struct sm_checker *smc = container_of(sm, struct sm_checker, sm); + return dm_sm_copy_root(smc->real_sm, copy_to_here_le, len); +} + +/*----------------------------------------------------------------*/ + +static struct dm_space_map ops_ = { + .destroy = sm_checker_destroy, + .get_nr_blocks = sm_checker_get_nr_blocks, + .get_nr_free = sm_checker_get_nr_free, + .inc_block = sm_checker_inc_block, + .dec_block = sm_checker_dec_block, + .new_block = sm_checker_new_block, + .get_count = sm_checker_get_count, + .count_is_more_than_one = sm_checker_count_more_than_one, + .set_count = sm_checker_set_count, + .commit = sm_checker_commit, + .extend = sm_checker_extend, + .root_size = sm_checker_root_size, + .copy_root = sm_checker_copy_root +}; + +struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) +{ + int r; + struct sm_checker *smc; + + if (!sm) + return NULL; + + smc = kmalloc(sizeof(*smc), GFP_KERNEL); + if (!smc) + return NULL; + + memcpy(&smc->sm, &ops_, sizeof(smc->sm)); + r = ca_create(&smc->old_counts, sm); + if (r) { + kfree(smc); + return NULL; + } + + r = ca_create(&smc->counts, sm); + if (r) { + ca_destroy(&smc->old_counts); + kfree(smc); + return NULL; + } + + smc->real_sm = sm; + + r = ca_load(&smc->counts, sm); + if (r) { + ca_destroy(&smc->counts); + ca_destroy(&smc->old_counts); + kfree(smc); + return NULL; + } + + r = ca_commit(&smc->old_counts, &smc->counts); + if (r) { + ca_destroy(&smc->counts); + ca_destroy(&smc->old_counts); + kfree(smc); + return NULL; + } + + return &smc->sm; +} +EXPORT_SYMBOL_GPL(dm_sm_checker_create); + +struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) +{ + int r; + struct sm_checker *smc; + + if (!sm) + return NULL; + + smc = kmalloc(sizeof(*smc), GFP_KERNEL); + if (!smc) + return NULL; + + memcpy(&smc->sm, &ops_, sizeof(smc->sm)); + r = ca_create(&smc->old_counts, sm); + if (r) { + kfree(smc); + return NULL; + } + + r = ca_create(&smc->counts, sm); + if (r) { + ca_destroy(&smc->old_counts); + kfree(smc); + return NULL; + } + + smc->real_sm = sm; + return &smc->sm; +} +EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh); + +/*----------------------------------------------------------------*/ + +#else + +struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) +{ + return sm; +} +EXPORT_SYMBOL_GPL(dm_sm_checker_create); + +struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) +{ + return sm; +} +EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh); + +/*----------------------------------------------------------------*/ + +#endif diff --cc drivers/md/persistent-data/dm-space-map-disk.c index aeff7852cf79,000000000000..fc469ba9f627 mode 100644,000000..100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@@ -1,335 -1,0 +1,335 @@@ +/* + * Copyright (C) 2011 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-space-map-checker.h" +#include "dm-space-map-common.h" +#include "dm-space-map-disk.h" +#include "dm-space-map.h" +#include "dm-transaction-manager.h" + +#include +#include - #include ++#include +#include + +#define DM_MSG_PREFIX "space map disk" + +/*----------------------------------------------------------------*/ + +/* + * Space map interface. + */ +struct sm_disk { + struct dm_space_map sm; + + struct ll_disk ll; + struct ll_disk old_ll; + + dm_block_t begin; + dm_block_t nr_allocated_this_transaction; +}; + +static void sm_disk_destroy(struct dm_space_map *sm) +{ + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + + kfree(smd); +} + +static int sm_disk_extend(struct dm_space_map *sm, dm_block_t extra_blocks) +{ + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + + return sm_ll_extend(&smd->ll, extra_blocks); +} + +static int sm_disk_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) +{ + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + *count = smd->old_ll.nr_blocks; + + return 0; +} + +static int sm_disk_get_nr_free(struct dm_space_map *sm, dm_block_t *count) +{ + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + *count = (smd->old_ll.nr_blocks - smd->old_ll.nr_allocated) - smd->nr_allocated_this_transaction; + + return 0; +} + +static int sm_disk_get_count(struct dm_space_map *sm, dm_block_t b, + uint32_t *result) +{ + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + return sm_ll_lookup(&smd->ll, b, result); +} + +static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b, + int *result) +{ + int r; + uint32_t count; + + r = sm_disk_get_count(sm, b, &count); + if (r) + return r; + + return count > 1; +} + +static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b, + uint32_t count) +{ + int r; + uint32_t old_count; + enum allocation_event ev; + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + + r = sm_ll_insert(&smd->ll, b, count, &ev); + if (!r) { + switch (ev) { + case SM_NONE: + break; + + case SM_ALLOC: + /* + * This _must_ be free in the prior transaction + * otherwise we've lost atomicity. + */ + smd->nr_allocated_this_transaction++; + break; + + case SM_FREE: + /* + * It's only free if it's also free in the last + * transaction. + */ + r = sm_ll_lookup(&smd->old_ll, b, &old_count); + if (r) + return r; + + if (!old_count) + smd->nr_allocated_this_transaction--; + break; + } + } + + return r; +} + +static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) +{ + int r; + enum allocation_event ev; + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + + r = sm_ll_inc(&smd->ll, b, &ev); + if (!r && (ev == SM_ALLOC)) + /* + * This _must_ be free in the prior transaction + * otherwise we've lost atomicity. + */ + smd->nr_allocated_this_transaction++; + + return r; +} + +static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) +{ + int r; + uint32_t old_count; + enum allocation_event ev; + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + + r = sm_ll_dec(&smd->ll, b, &ev); + if (!r && (ev == SM_FREE)) { + /* + * It's only free if it's also free in the last + * transaction. + */ + r = sm_ll_lookup(&smd->old_ll, b, &old_count); + if (r) + return r; + + if (!old_count) + smd->nr_allocated_this_transaction--; + } + + return r; +} + +static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) +{ + int r; + enum allocation_event ev; + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + + /* FIXME: we should loop round a couple of times */ + r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b); + if (r) + return r; + + smd->begin = *b + 1; + r = sm_ll_inc(&smd->ll, *b, &ev); + if (!r) { + BUG_ON(ev != SM_ALLOC); + smd->nr_allocated_this_transaction++; + } + + return r; +} + +static int sm_disk_commit(struct dm_space_map *sm) +{ + int r; + dm_block_t nr_free; + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + + r = sm_disk_get_nr_free(sm, &nr_free); + if (r) + return r; + + r = sm_ll_commit(&smd->ll); + if (r) + return r; + + memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll)); + smd->begin = 0; + smd->nr_allocated_this_transaction = 0; + + r = sm_disk_get_nr_free(sm, &nr_free); + if (r) + return r; + + return 0; +} + +static int sm_disk_root_size(struct dm_space_map *sm, size_t *result) +{ + *result = sizeof(struct disk_sm_root); + + return 0; +} + +static int sm_disk_copy_root(struct dm_space_map *sm, void *where_le, size_t max) +{ + struct sm_disk *smd = container_of(sm, struct sm_disk, sm); + struct disk_sm_root root_le; + + root_le.nr_blocks = cpu_to_le64(smd->ll.nr_blocks); + root_le.nr_allocated = cpu_to_le64(smd->ll.nr_allocated); + root_le.bitmap_root = cpu_to_le64(smd->ll.bitmap_root); + root_le.ref_count_root = cpu_to_le64(smd->ll.ref_count_root); + + if (max < sizeof(root_le)) + return -ENOSPC; + + memcpy(where_le, &root_le, sizeof(root_le)); + + return 0; +} + +/*----------------------------------------------------------------*/ + +static struct dm_space_map ops = { + .destroy = sm_disk_destroy, + .extend = sm_disk_extend, + .get_nr_blocks = sm_disk_get_nr_blocks, + .get_nr_free = sm_disk_get_nr_free, + .get_count = sm_disk_get_count, + .count_is_more_than_one = sm_disk_count_is_more_than_one, + .set_count = sm_disk_set_count, + .inc_block = sm_disk_inc_block, + .dec_block = sm_disk_dec_block, + .new_block = sm_disk_new_block, + .commit = sm_disk_commit, + .root_size = sm_disk_root_size, + .copy_root = sm_disk_copy_root +}; + +static struct dm_space_map *dm_sm_disk_create_real( + struct dm_transaction_manager *tm, + dm_block_t nr_blocks) +{ + int r; + struct sm_disk *smd; + + smd = kmalloc(sizeof(*smd), GFP_KERNEL); + if (!smd) + return ERR_PTR(-ENOMEM); + + smd->begin = 0; + smd->nr_allocated_this_transaction = 0; + memcpy(&smd->sm, &ops, sizeof(smd->sm)); + + r = sm_ll_new_disk(&smd->ll, tm); + if (r) + goto bad; + + r = sm_ll_extend(&smd->ll, nr_blocks); + if (r) + goto bad; + + r = sm_disk_commit(&smd->sm); + if (r) + goto bad; + + return &smd->sm; + +bad: + kfree(smd); + return ERR_PTR(r); +} + +struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, + dm_block_t nr_blocks) +{ + struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks); + return dm_sm_checker_create_fresh(sm); +} +EXPORT_SYMBOL_GPL(dm_sm_disk_create); + +static struct dm_space_map *dm_sm_disk_open_real( + struct dm_transaction_manager *tm, + void *root_le, size_t len) +{ + int r; + struct sm_disk *smd; + + smd = kmalloc(sizeof(*smd), GFP_KERNEL); + if (!smd) + return ERR_PTR(-ENOMEM); + + smd->begin = 0; + smd->nr_allocated_this_transaction = 0; + memcpy(&smd->sm, &ops, sizeof(smd->sm)); + + r = sm_ll_open_disk(&smd->ll, tm, root_le, len); + if (r) + goto bad; + + r = sm_disk_commit(&smd->sm); + if (r) + goto bad; + + return &smd->sm; + +bad: + kfree(smd); + return ERR_PTR(r); +} + +struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm, + void *root_le, size_t len) +{ + return dm_sm_checker_create( + dm_sm_disk_open_real(tm, root_le, len)); +} +EXPORT_SYMBOL_GPL(dm_sm_disk_open); + +/*----------------------------------------------------------------*/ diff --cc drivers/md/persistent-data/dm-transaction-manager.c index 728e89a3f978,000000000000..6f8d38747d7f mode 100644,000000..100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@@ -1,400 -1,0 +1,400 @@@ +/* + * Copyright (C) 2011 Red Hat, Inc. + * + * This file is released under the GPL. + */ +#include "dm-transaction-manager.h" +#include "dm-space-map.h" +#include "dm-space-map-checker.h" +#include "dm-space-map-disk.h" +#include "dm-space-map-metadata.h" +#include "dm-persistent-data-internal.h" + - #include ++#include +#include +#include + +#define DM_MSG_PREFIX "transaction manager" + +/*----------------------------------------------------------------*/ + +struct shadow_info { + struct hlist_node hlist; + dm_block_t where; +}; + +/* + * It would be nice if we scaled with the size of transaction. + */ +#define HASH_SIZE 256 +#define HASH_MASK (HASH_SIZE - 1) + +struct dm_transaction_manager { + int is_clone; + struct dm_transaction_manager *real; + + struct dm_block_manager *bm; + struct dm_space_map *sm; + + spinlock_t lock; + struct hlist_head buckets[HASH_SIZE]; +}; + +/*----------------------------------------------------------------*/ + +static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b) +{ + int r = 0; + unsigned bucket = dm_hash_block(b, HASH_MASK); + struct shadow_info *si; + struct hlist_node *n; + + spin_lock(&tm->lock); + hlist_for_each_entry(si, n, tm->buckets + bucket, hlist) + if (si->where == b) { + r = 1; + break; + } + spin_unlock(&tm->lock); + + return r; +} + +/* + * This can silently fail if there's no memory. We're ok with this since + * creating redundant shadows causes no harm. + */ +static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b) +{ + unsigned bucket; + struct shadow_info *si; + + si = kmalloc(sizeof(*si), GFP_NOIO); + if (si) { + si->where = b; + bucket = dm_hash_block(b, HASH_MASK); + spin_lock(&tm->lock); + hlist_add_head(&si->hlist, tm->buckets + bucket); + spin_unlock(&tm->lock); + } +} + +static void wipe_shadow_table(struct dm_transaction_manager *tm) +{ + struct shadow_info *si; + struct hlist_node *n, *tmp; + struct hlist_head *bucket; + int i; + + spin_lock(&tm->lock); + for (i = 0; i < HASH_SIZE; i++) { + bucket = tm->buckets + i; + hlist_for_each_entry_safe(si, n, tmp, bucket, hlist) + kfree(si); + + INIT_HLIST_HEAD(bucket); + } + + spin_unlock(&tm->lock); +} + +/*----------------------------------------------------------------*/ + +static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm, + struct dm_space_map *sm) +{ + int i; + struct dm_transaction_manager *tm; + + tm = kmalloc(sizeof(*tm), GFP_KERNEL); + if (!tm) + return ERR_PTR(-ENOMEM); + + tm->is_clone = 0; + tm->real = NULL; + tm->bm = bm; + tm->sm = sm; + + spin_lock_init(&tm->lock); + for (i = 0; i < HASH_SIZE; i++) + INIT_HLIST_HEAD(tm->buckets + i); + + return tm; +} + +struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transaction_manager *real) +{ + struct dm_transaction_manager *tm; + + tm = kmalloc(sizeof(*tm), GFP_KERNEL); + if (tm) { + tm->is_clone = 1; + tm->real = real; + } + + return tm; +} +EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone); + +void dm_tm_destroy(struct dm_transaction_manager *tm) +{ + kfree(tm); +} +EXPORT_SYMBOL_GPL(dm_tm_destroy); + +int dm_tm_pre_commit(struct dm_transaction_manager *tm) +{ + int r; + + if (tm->is_clone) + return -EWOULDBLOCK; + + r = dm_sm_commit(tm->sm); + if (r < 0) + return r; + + return 0; +} +EXPORT_SYMBOL_GPL(dm_tm_pre_commit); + +int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root) +{ + if (tm->is_clone) + return -EWOULDBLOCK; + + wipe_shadow_table(tm); + + return dm_bm_flush_and_unlock(tm->bm, root); +} +EXPORT_SYMBOL_GPL(dm_tm_commit); + +int dm_tm_new_block(struct dm_transaction_manager *tm, + struct dm_block_validator *v, + struct dm_block **result) +{ + int r; + dm_block_t new_block; + + if (tm->is_clone) + return -EWOULDBLOCK; + + r = dm_sm_new_block(tm->sm, &new_block); + if (r < 0) + return r; + + r = dm_bm_write_lock_zero(tm->bm, new_block, v, result); + if (r < 0) { + dm_sm_dec_block(tm->sm, new_block); + return r; + } + + /* + * New blocks count as shadows in that they don't need to be + * shadowed again. + */ + insert_shadow(tm, new_block); + + return 0; +} + +static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, + struct dm_block_validator *v, + struct dm_block **result) +{ + int r; + dm_block_t new; + struct dm_block *orig_block; + + r = dm_sm_new_block(tm->sm, &new); + if (r < 0) + return r; + + r = dm_sm_dec_block(tm->sm, orig); + if (r < 0) + return r; + + r = dm_bm_read_lock(tm->bm, orig, v, &orig_block); + if (r < 0) + return r; + + r = dm_bm_unlock_move(orig_block, new); + if (r < 0) { + dm_bm_unlock(orig_block); + return r; + } + + return dm_bm_write_lock(tm->bm, new, v, result); +} + +int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, + struct dm_block_validator *v, struct dm_block **result, + int *inc_children) +{ + int r; + + if (tm->is_clone) + return -EWOULDBLOCK; + + r = dm_sm_count_is_more_than_one(tm->sm, orig, inc_children); + if (r < 0) + return r; + + if (is_shadow(tm, orig) && !*inc_children) + return dm_bm_write_lock(tm->bm, orig, v, result); + + r = __shadow_block(tm, orig, v, result); + if (r < 0) + return r; + insert_shadow(tm, dm_block_location(*result)); + + return r; +} + +int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b, + struct dm_block_validator *v, + struct dm_block **blk) +{ + if (tm->is_clone) + return dm_bm_read_try_lock(tm->real->bm, b, v, blk); + + return dm_bm_read_lock(tm->bm, b, v, blk); +} + +int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b) +{ + return dm_bm_unlock(b); +} +EXPORT_SYMBOL_GPL(dm_tm_unlock); + +void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b) +{ + /* + * The non-blocking clone doesn't support this. + */ + BUG_ON(tm->is_clone); + + dm_sm_inc_block(tm->sm, b); +} +EXPORT_SYMBOL_GPL(dm_tm_inc); + +void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b) +{ + /* + * The non-blocking clone doesn't support this. + */ + BUG_ON(tm->is_clone); + + dm_sm_dec_block(tm->sm, b); +} +EXPORT_SYMBOL_GPL(dm_tm_dec); + +int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, + uint32_t *result) +{ + if (tm->is_clone) + return -EWOULDBLOCK; + + return dm_sm_get_count(tm->sm, b, result); +} + +struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm) +{ + return tm->bm; +} + +/*----------------------------------------------------------------*/ + +static int dm_tm_create_internal(struct dm_block_manager *bm, + dm_block_t sb_location, + struct dm_block_validator *sb_validator, + size_t root_offset, size_t root_max_len, + struct dm_transaction_manager **tm, + struct dm_space_map **sm, + struct dm_block **sblock, + int create) +{ + int r; + struct dm_space_map *inner; + + inner = dm_sm_metadata_init(); + if (IS_ERR(inner)) + return PTR_ERR(inner); + + *tm = dm_tm_create(bm, inner); + if (IS_ERR(*tm)) { + dm_sm_destroy(inner); + return PTR_ERR(*tm); + } + + if (create) { + r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location, + sb_validator, sblock); + if (r < 0) { + DMERR("couldn't lock superblock"); + goto bad1; + } + + r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm), + sb_location); + if (r) { + DMERR("couldn't create metadata space map"); + goto bad2; + } + + *sm = dm_sm_checker_create(inner); + if (!*sm) + goto bad2; + + } else { + r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location, + sb_validator, sblock); + if (r < 0) { + DMERR("couldn't lock superblock"); + goto bad1; + } + + r = dm_sm_metadata_open(inner, *tm, + dm_block_data(*sblock) + root_offset, + root_max_len); + if (r) { + DMERR("couldn't open metadata space map"); + goto bad2; + } + + *sm = dm_sm_checker_create(inner); + if (!*sm) + goto bad2; + } + + return 0; + +bad2: + dm_tm_unlock(*tm, *sblock); +bad1: + dm_tm_destroy(*tm); + dm_sm_destroy(inner); + return r; +} + +int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, + struct dm_block_validator *sb_validator, + struct dm_transaction_manager **tm, + struct dm_space_map **sm, struct dm_block **sblock) +{ + return dm_tm_create_internal(bm, sb_location, sb_validator, + 0, 0, tm, sm, sblock, 1); +} +EXPORT_SYMBOL_GPL(dm_tm_create_with_sm); + +int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, + struct dm_block_validator *sb_validator, + size_t root_offset, size_t root_max_len, + struct dm_transaction_manager **tm, + struct dm_space_map **sm, struct dm_block **sblock) +{ + return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset, + root_max_len, tm, sm, sblock, 0); +} +EXPORT_SYMBOL_GPL(dm_tm_open_with_sm); + +/*----------------------------------------------------------------*/ diff --cc drivers/media/common/saa7146_core.c index f5d53a202344,31e53b6a881a..d6b1cf66042d --- a/drivers/media/common/saa7146_core.c +++ b/drivers/media/common/saa7146_core.c @@@ -18,9 -18,8 +18,10 @@@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include + #include LIST_HEAD(saa7146_devices); DEFINE_MUTEX(saa7146_devices_lock); diff --cc drivers/media/common/saa7146_fops.c index a92546144eaa,e4547afcfa88..71f8e018e564 --- a/drivers/media/common/saa7146_fops.c +++ b/drivers/media/common/saa7146_fops.c @@@ -1,6 -1,5 +1,7 @@@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include + #include /****************************************************************************/ /* resource management functions, shamelessly stolen from saa7134 driver */ diff --cc drivers/media/common/saa7146_hlp.c index 79ad73accb27,c9c6e9a6c31d..bc1f545c95cb --- a/drivers/media/common/saa7146_hlp.c +++ b/drivers/media/common/saa7146_hlp.c @@@ -1,6 -1,5 +1,7 @@@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include + #include #include static void calculate_output_format_register(struct saa7146_dev* saa, u32 palette, u32* clip_format) diff --cc drivers/media/common/saa7146_video.c index 384b358d3037,3a00253fe1ee..ce30533fd972 --- a/drivers/media/common/saa7146_video.c +++ b/drivers/media/common/saa7146_video.c @@@ -1,7 -1,6 +1,8 @@@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include + #include static int max_memory = 32; diff --cc drivers/media/dvb/frontends/dibx000_common.c index 774d507b66cc,977211fec137..43be7238311e --- a/drivers/media/dvb/frontends/dibx000_common.c +++ b/drivers/media/dvb/frontends/dibx000_common.c @@@ -1,5 -1,5 +1,6 @@@ #include +#include + #include #include "dibx000_common.h" diff --cc drivers/media/video/adp1653.c index 5914390211ff,c2594948ca3f..12eedf4d515a --- a/drivers/media/video/adp1653.c +++ b/drivers/media/video/adp1653.c @@@ -31,8 -31,8 +31,9 @@@ */ #include + #include #include +#include #include #include #include diff --cc drivers/media/video/imx074.c index 8775e262bb6e,3319b40c87a4..eec75bb57203 --- a/drivers/media/video/imx074.c +++ b/drivers/media/video/imx074.c @@@ -12,11 -12,12 +12,12 @@@ #include #include +#include #include #include + #include #include -#include #include #include diff --cc drivers/media/video/mt9m001.c index 63ae5c61c9bf,6866a9ef3f60..e2b1029b16cd --- a/drivers/media/video/mt9m001.c +++ b/drivers/media/video/mt9m001.c @@@ -12,12 -12,11 +12,13 @@@ #include #include #include + #include +#include +#include #include #include -#include +#include /* * mt9m001 i2c address 0x5d diff --cc drivers/media/video/mt9m111.c index f023cc092c2b,66e3c3c2e606..cf2c0fb95f2f --- a/drivers/media/video/mt9m111.c +++ b/drivers/media/video/mt9m111.c @@@ -13,12 -13,11 +13,13 @@@ #include #include #include +#include + #include +#include #include +#include #include -#include /* * MT9M111, MT9M112 and MT9M131: diff --cc drivers/media/video/mt9t031.c index 7ee84cc578b9,e6e0238eca16..0e78477452ff --- a/drivers/media/video/mt9t031.c +++ b/drivers/media/video/mt9t031.c @@@ -13,8 -13,8 +13,9 @@@ #include #include #include +#include #include + #include #include #include diff --cc drivers/media/video/mt9v022.c index b6a29f7de82c,c74d6604598e..690ee0d42eeb --- a/drivers/media/video/mt9v022.c +++ b/drivers/media/video/mt9v022.c @@@ -13,12 -13,11 +13,13 @@@ #include #include #include + #include +#include +#include #include #include -#include +#include /* * mt9v022 i2c address 0x48, 0x4c, 0x58, 0x5c diff --cc drivers/media/video/ov6650.c index d5b057207a7b,2e1680631f0f..9f2d26b1d4cb --- a/drivers/media/video/ov6650.c +++ b/drivers/media/video/ov6650.c @@@ -28,7 -28,7 +28,8 @@@ #include #include #include +#include + #include #include #include diff --cc drivers/media/video/rj54n1cb0c.c index 6afc61689549,985965f744ff..9937386a3bae --- a/drivers/media/video/rj54n1cb0c.c +++ b/drivers/media/video/rj54n1cb0c.c @@@ -11,8 -11,8 +11,9 @@@ #include #include #include +#include #include + #include #include #include diff --cc drivers/media/video/v4l2-device.c index 9fc0ae8a526a,c742b1f5e73e..0edd618b9ddf --- a/drivers/media/video/v4l2-device.c +++ b/drivers/media/video/v4l2-device.c @@@ -20,8 -20,8 +20,9 @@@ #include #include + #include #include +#include #if defined(CONFIG_SPI) #include #endif diff --cc drivers/mfd/max8997.c index dc58750bb71b,50ad93bb49dd..5be53ae9b61c --- a/drivers/mfd/max8997.c +++ b/drivers/mfd/max8997.c @@@ -23,8 -23,8 +23,9 @@@ #include #include +#include #include + #include #include #include #include diff --cc drivers/s390/char/vmur.c index d291a54acfad,b95cbdccc11a..85f4a9a5d12e --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@@ -11,8 -11,10 +11,9 @@@ #define KMSG_COMPONENT "vmur" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include #include #include + #include #include #include diff --cc drivers/tty/hvc/hvc_opal.c index 7b38512d6c41,000000000000..ced26c8ccd57 mode 100644,000000..100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@@ -1,424 -1,0 +1,425 @@@ +/* + * opal driver interface to hvc_console.c + * + * Copyright 2011 Benjamin Herrenschmidt , IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include ++#include + +#include +#include +#include +#include +#include +#include + +#include "hvc_console.h" + +static const char hvc_opal_name[] = "hvc_opal"; + +static struct of_device_id hvc_opal_match[] __devinitdata = { + { .name = "serial", .compatible = "ibm,opal-console-raw" }, + { .name = "serial", .compatible = "ibm,opal-console-hvsi" }, + { }, +}; + +typedef enum hv_protocol { + HV_PROTOCOL_RAW, + HV_PROTOCOL_HVSI +} hv_protocol_t; + +struct hvc_opal_priv { + hv_protocol_t proto; /* Raw data or HVSI packets */ + struct hvsi_priv hvsi; /* HVSI specific data */ +}; +static struct hvc_opal_priv *hvc_opal_privs[MAX_NR_HVC_CONSOLES]; + +/* For early boot console */ +static struct hvc_opal_priv hvc_opal_boot_priv; +static u32 hvc_opal_boot_termno; + +static const struct hv_ops hvc_opal_raw_ops = { + .get_chars = opal_get_chars, + .put_chars = opal_put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, + .notifier_hangup = notifier_hangup_irq, +}; + +static int hvc_opal_hvsi_get_chars(uint32_t vtermno, char *buf, int count) +{ + struct hvc_opal_priv *pv = hvc_opal_privs[vtermno]; + + if (WARN_ON(!pv)) + return -ENODEV; + + return hvsilib_get_chars(&pv->hvsi, buf, count); +} + +static int hvc_opal_hvsi_put_chars(uint32_t vtermno, const char *buf, int count) +{ + struct hvc_opal_priv *pv = hvc_opal_privs[vtermno]; + + if (WARN_ON(!pv)) + return -ENODEV; + + return hvsilib_put_chars(&pv->hvsi, buf, count); +} + +static int hvc_opal_hvsi_open(struct hvc_struct *hp, int data) +{ + struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno]; + int rc; + + pr_devel("HVSI@%x: do open !\n", hp->vtermno); + + rc = notifier_add_irq(hp, data); + if (rc) + return rc; + + return hvsilib_open(&pv->hvsi, hp); +} + +static void hvc_opal_hvsi_close(struct hvc_struct *hp, int data) +{ + struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno]; + + pr_devel("HVSI@%x: do close !\n", hp->vtermno); + + hvsilib_close(&pv->hvsi, hp); + + notifier_del_irq(hp, data); +} + +void hvc_opal_hvsi_hangup(struct hvc_struct *hp, int data) +{ + struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno]; + + pr_devel("HVSI@%x: do hangup !\n", hp->vtermno); + + hvsilib_close(&pv->hvsi, hp); + + notifier_hangup_irq(hp, data); +} + +static int hvc_opal_hvsi_tiocmget(struct hvc_struct *hp) +{ + struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno]; + + if (!pv) + return -EINVAL; + return pv->hvsi.mctrl; +} + +static int hvc_opal_hvsi_tiocmset(struct hvc_struct *hp, unsigned int set, + unsigned int clear) +{ + struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno]; + + pr_devel("HVSI@%x: Set modem control, set=%x,clr=%x\n", + hp->vtermno, set, clear); + + if (set & TIOCM_DTR) + hvsilib_write_mctrl(&pv->hvsi, 1); + else if (clear & TIOCM_DTR) + hvsilib_write_mctrl(&pv->hvsi, 0); + + return 0; +} + +static const struct hv_ops hvc_opal_hvsi_ops = { + .get_chars = hvc_opal_hvsi_get_chars, + .put_chars = hvc_opal_hvsi_put_chars, + .notifier_add = hvc_opal_hvsi_open, + .notifier_del = hvc_opal_hvsi_close, + .notifier_hangup = hvc_opal_hvsi_hangup, + .tiocmget = hvc_opal_hvsi_tiocmget, + .tiocmset = hvc_opal_hvsi_tiocmset, +}; + +static int __devinit hvc_opal_probe(struct platform_device *dev) +{ + const struct hv_ops *ops; + struct hvc_struct *hp; + struct hvc_opal_priv *pv; + hv_protocol_t proto; + unsigned int termno, boot = 0; + const __be32 *reg; + + if (of_device_is_compatible(dev->dev.of_node, "ibm,opal-console-raw")) { + proto = HV_PROTOCOL_RAW; + ops = &hvc_opal_raw_ops; + } else if (of_device_is_compatible(dev->dev.of_node, + "ibm,opal-console-hvsi")) { + proto = HV_PROTOCOL_HVSI; + ops = &hvc_opal_hvsi_ops; + } else { + pr_err("hvc_opal: Unkown protocol for %s\n", + dev->dev.of_node->full_name); + return -ENXIO; + } + + reg = of_get_property(dev->dev.of_node, "reg", NULL); + termno = reg ? be32_to_cpup(reg) : 0; + + /* Is it our boot one ? */ + if (hvc_opal_privs[termno] == &hvc_opal_boot_priv) { + pv = hvc_opal_privs[termno]; + boot = 1; + } else if (hvc_opal_privs[termno] == NULL) { + pv = kzalloc(sizeof(struct hvc_opal_priv), GFP_KERNEL); + if (!pv) + return -ENOMEM; + pv->proto = proto; + hvc_opal_privs[termno] = pv; + if (proto == HV_PROTOCOL_HVSI) + hvsilib_init(&pv->hvsi, opal_get_chars, opal_put_chars, + termno, 0); + + /* Instanciate now to establish a mapping index==vtermno */ + hvc_instantiate(termno, termno, ops); + } else { + pr_err("hvc_opal: Device %s has duplicate terminal number #%d\n", + dev->dev.of_node->full_name, termno); + return -ENXIO; + } + + pr_info("hvc%d: %s protocol on %s%s\n", termno, + proto == HV_PROTOCOL_RAW ? "raw" : "hvsi", + dev->dev.of_node->full_name, + boot ? " (boot console)" : ""); + + /* We don't do IRQ yet */ + hp = hvc_alloc(termno, 0, ops, MAX_VIO_PUT_CHARS); + if (IS_ERR(hp)) + return PTR_ERR(hp); + dev_set_drvdata(&dev->dev, hp); + + return 0; +} + +static int __devexit hvc_opal_remove(struct platform_device *dev) +{ + struct hvc_struct *hp = dev_get_drvdata(&dev->dev); + int rc, termno; + + termno = hp->vtermno; + rc = hvc_remove(hp); + if (rc == 0) { + if (hvc_opal_privs[termno] != &hvc_opal_boot_priv) + kfree(hvc_opal_privs[termno]); + hvc_opal_privs[termno] = NULL; + } + return rc; +} + +static struct platform_driver hvc_opal_driver = { + .probe = hvc_opal_probe, + .remove = __devexit_p(hvc_opal_remove), + .driver = { + .name = hvc_opal_name, + .owner = THIS_MODULE, + .of_match_table = hvc_opal_match, + } +}; + +static int __init hvc_opal_init(void) +{ + if (!firmware_has_feature(FW_FEATURE_OPAL)) + return -ENODEV; + + /* Register as a vio device to receive callbacks */ + return platform_driver_register(&hvc_opal_driver); +} +module_init(hvc_opal_init); + +static void __exit hvc_opal_exit(void) +{ + platform_driver_unregister(&hvc_opal_driver); +} +module_exit(hvc_opal_exit); + +static void udbg_opal_putc(char c) +{ + unsigned int termno = hvc_opal_boot_termno; + int count = -1; + + if (c == '\n') + udbg_opal_putc('\r'); + + do { + switch(hvc_opal_boot_priv.proto) { + case HV_PROTOCOL_RAW: + count = opal_put_chars(termno, &c, 1); + break; + case HV_PROTOCOL_HVSI: + count = hvc_opal_hvsi_put_chars(termno, &c, 1); + break; + } + } while(count == 0 || count == -EAGAIN); +} + +static int udbg_opal_getc_poll(void) +{ + unsigned int termno = hvc_opal_boot_termno; + int rc = 0; + char c; + + switch(hvc_opal_boot_priv.proto) { + case HV_PROTOCOL_RAW: + rc = opal_get_chars(termno, &c, 1); + break; + case HV_PROTOCOL_HVSI: + rc = hvc_opal_hvsi_get_chars(termno, &c, 1); + break; + } + if (!rc) + return -1; + return c; +} + +static int udbg_opal_getc(void) +{ + int ch; + for (;;) { + ch = udbg_opal_getc_poll(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + +static void udbg_init_opal_common(void) +{ + udbg_putc = udbg_opal_putc; + udbg_getc = udbg_opal_getc; + udbg_getc_poll = udbg_opal_getc_poll; + tb_ticks_per_usec = 0x200; /* Make udelay not suck */ +} + +void __init hvc_opal_init_early(void) +{ + struct device_node *stdout_node = NULL; + const u32 *termno; + const char *name = NULL; + const struct hv_ops *ops; + u32 index; + + /* find the boot console from /chosen/stdout */ + if (of_chosen) + name = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (name) { + stdout_node = of_find_node_by_path(name); + if (!stdout_node) { + pr_err("hvc_opal: Failed to locate default console!\n"); + return; + } + } else { + struct device_node *opal, *np; + + /* Current OPAL takeover doesn't provide the stdout + * path, so we hard wire it + */ + opal = of_find_node_by_path("/ibm,opal/consoles"); + if (opal) + pr_devel("hvc_opal: Found consoles in new location\n"); + if (!opal) { + opal = of_find_node_by_path("/ibm,opal"); + if (opal) + pr_devel("hvc_opal: " + "Found consoles in old location\n"); + } + if (!opal) + return; + for_each_child_of_node(opal, np) { + if (!strcmp(np->name, "serial")) { + stdout_node = np; + break; + } + } + of_node_put(opal); + } + if (!stdout_node) + return; + termno = of_get_property(stdout_node, "reg", NULL); + index = termno ? *termno : 0; + if (index >= MAX_NR_HVC_CONSOLES) + return; + hvc_opal_privs[index] = &hvc_opal_boot_priv; + + /* Check the protocol */ + if (of_device_is_compatible(stdout_node, "ibm,opal-console-raw")) { + hvc_opal_boot_priv.proto = HV_PROTOCOL_RAW; + ops = &hvc_opal_raw_ops; + pr_devel("hvc_opal: Found RAW console\n"); + } + else if (of_device_is_compatible(stdout_node,"ibm,opal-console-hvsi")) { + hvc_opal_boot_priv.proto = HV_PROTOCOL_HVSI; + ops = &hvc_opal_hvsi_ops; + hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars, + opal_put_chars, index, 1); + /* HVSI, perform the handshake now */ + hvsilib_establish(&hvc_opal_boot_priv.hvsi); + pr_devel("hvc_opal: Found HVSI console\n"); + } else + goto out; + hvc_opal_boot_termno = index; + udbg_init_opal_common(); + add_preferred_console("hvc", index, NULL); + hvc_instantiate(index, index, ops); +out: + of_node_put(stdout_node); +} + +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL_RAW +void __init udbg_init_debug_opal(void) +{ + u32 index = CONFIG_PPC_EARLY_DEBUG_OPAL_VTERMNO; + hvc_opal_privs[index] = &hvc_opal_boot_priv; + hvc_opal_boot_priv.proto = HV_PROTOCOL_RAW; + hvc_opal_boot_termno = index; + udbg_init_opal_common(); +} +#endif /* CONFIG_PPC_EARLY_DEBUG_OPAL_RAW */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI +void __init udbg_init_debug_opal_hvsi(void) +{ + u32 index = CONFIG_PPC_EARLY_DEBUG_OPAL_VTERMNO; + hvc_opal_privs[index] = &hvc_opal_boot_priv; + hvc_opal_boot_termno = index; + udbg_init_opal_common(); + hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars, opal_put_chars, + index, 1); + hvsilib_establish(&hvc_opal_boot_priv.hvsi); +} +#endif /* CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI */ diff --cc include/linux/dmaengine.h index ace51af4369f,1ceff5ae9d31..75f53f874b24 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@@ -24,8 -24,11 +24,10 @@@ #include #include #include +#include + #include + #include -struct scatterlist; - /** * typedef dma_cookie_t - an opaque DMA cookie * diff --cc net/8021q/vlan_core.c index 163397f1fd5a,1f64cc9da1b0..f5ffc02729d6 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@@ -2,9 -2,10 +2,10 @@@ #include #include #include + #include #include "vlan.h" -bool vlan_do_receive(struct sk_buff **skbp) +bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) { struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;