]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - fs/btrfs/extent_map.c
Btrfs: Add writepages support
[karo-tx-linux.git] / fs / btrfs / extent_map.c
index e081558d52ff1f832848011962b5eea769cb05b6..b0677c84bb751fb7a0838860117fe74c61274d5c 100644 (file)
@@ -8,6 +8,9 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/swap.h>
+#include <linux/version.h>
+#include <linux/writeback.h>
 #include "extent_map.h"
 
 /* temporary define until extent_map moves out of btrfs */
@@ -20,14 +23,11 @@ static struct kmem_cache *extent_map_cache;
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
 
-static LIST_HEAD(extent_buffers);
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
 
-static spinlock_t extent_buffers_lock;
 static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
-static int nr_extent_buffers;
-#define MAX_EXTENT_BUFFER_CACHE 128
+#define BUFFER_LRU_MAX 64
 
 struct tree_entry {
        u64 start;
@@ -36,6 +36,12 @@ struct tree_entry {
        struct rb_node rb_node;
 };
 
+struct extent_page_data {
+       struct bio *bio;
+       struct extent_map_tree *tree;
+       get_extent_t *get_extent;
+};
+
 void __init extent_map_init(void)
 {
        extent_map_cache = btrfs_cache_create("extent_map",
@@ -47,20 +53,12 @@ void __init extent_map_init(void)
        extent_buffer_cache = btrfs_cache_create("extent_buffers",
                                            sizeof(struct extent_buffer), 0,
                                            NULL);
-       spin_lock_init(&extent_buffers_lock);
 }
 
 void __exit extent_map_exit(void)
 {
-       struct extent_buffer *eb;
        struct extent_state *state;
 
-       while (!list_empty(&extent_buffers)) {
-               eb = list_entry(extent_buffers.next,
-                               struct extent_buffer, list);
-               list_del(&eb->list);
-               kmem_cache_free(extent_buffer_cache, eb);
-       }
        while (!list_empty(&states)) {
                state = list_entry(states.next, struct extent_state, list);
                printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
@@ -68,14 +66,6 @@ void __exit extent_map_exit(void)
                kmem_cache_free(extent_state_cache, state);
 
        }
-       while (!list_empty(&buffers)) {
-               eb = list_entry(buffers.next,
-                               struct extent_buffer, leak_list);
-               printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr);
-               list_del(&eb->leak_list);
-               kmem_cache_free(extent_buffer_cache, eb);
-       }
-
 
        if (extent_map_cache)
                kmem_cache_destroy(extent_map_cache);
@@ -92,10 +82,25 @@ void extent_map_tree_init(struct extent_map_tree *tree,
        tree->state.rb_node = NULL;
        tree->ops = NULL;
        rwlock_init(&tree->lock);
+       spin_lock_init(&tree->lru_lock);
        tree->mapping = mapping;
+       INIT_LIST_HEAD(&tree->buffer_lru);
+       tree->lru_size = 0;
 }
 EXPORT_SYMBOL(extent_map_tree_init);
 
+void extent_map_tree_empty_lru(struct extent_map_tree *tree)
+{
+       struct extent_buffer *eb;
+       while(!list_empty(&tree->buffer_lru)) {
+               eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
+                               lru);
+               list_del(&eb->lru);
+               free_extent_buffer(eb);
+       }
+}
+EXPORT_SYMBOL(extent_map_tree_empty_lru);
+
 struct extent_map *alloc_extent_map(gfp_t mask)
 {
        struct extent_map *em;
@@ -265,7 +270,12 @@ int add_extent_mapping(struct extent_map_tree *tree,
                if (prev && prev->end + 1 == em->start &&
                    ((em->block_start == EXTENT_MAP_HOLE &&
                      prev->block_start == EXTENT_MAP_HOLE) ||
-                            (em->block_start == prev->block_end + 1))) {
+                    (em->block_start == EXTENT_MAP_INLINE &&
+                     prev->block_start == EXTENT_MAP_INLINE) ||
+                    (em->block_start == EXTENT_MAP_DELALLOC &&
+                     prev->block_start == EXTENT_MAP_DELALLOC) ||
+                    (em->block_start < EXTENT_MAP_DELALLOC - 1 &&
+                     em->block_start == prev->block_end + 1))) {
                        em->start = prev->start;
                        em->block_start = prev->block_start;
                        rb_erase(&prev->rb_node, &tree->map);
@@ -981,7 +991,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
        struct extent_state *state;
        int ret = 1;
 
-       write_lock_irq(&tree->lock);
+       read_lock_irq(&tree->lock);
        /*
         * this search will find all the extents that end after
         * our range starts.
@@ -993,7 +1003,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
 
        while(1) {
                state = rb_entry(node, struct extent_state, rb_node);
-               if (state->state & bits) {
+               if (state->end >= start && (state->state & bits)) {
                        *start_ret = state->start;
                        *end_ret = state->end;
                        ret = 0;
@@ -1004,7 +1014,7 @@ int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
                        break;
        }
 out:
-       write_unlock_irq(&tree->lock);
+       read_unlock_irq(&tree->lock);
        return ret;
 }
 EXPORT_SYMBOL(find_first_extent_bit);
@@ -1041,6 +1051,8 @@ search_again:
                        if (state->state & EXTENT_LOCKED) {
                                DEFINE_WAIT(wait);
                                atomic_inc(&state->refs);
+                               prepare_to_wait(&state->wq, &wait,
+                                               TASK_UNINTERRUPTIBLE);
                                write_unlock_irq(&tree->lock);
                                schedule();
                                write_lock_irq(&tree->lock);
@@ -1056,7 +1068,7 @@ search_again:
                node = rb_next(node);
                if (!node)
                        break;
-               total_bytes = state->end - state->start + 1;
+               total_bytes += state->end - state->start + 1;
                if (total_bytes >= max_bytes)
                        break;
        }
@@ -1231,7 +1243,7 @@ EXPORT_SYMBOL(test_range_bit);
 static int check_page_uptodate(struct extent_map_tree *tree,
                               struct page *page)
 {
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
        if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
                SetPageUptodate(page);
@@ -1245,7 +1257,7 @@ static int check_page_uptodate(struct extent_map_tree *tree,
 static int check_page_locked(struct extent_map_tree *tree,
                             struct page *page)
 {
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
        if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
                unlock_page(page);
@@ -1259,7 +1271,7 @@ static int check_page_locked(struct extent_map_tree *tree,
 static int check_page_writeback(struct extent_map_tree *tree,
                             struct page *page)
 {
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
        if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
                end_page_writeback(page);
@@ -1277,8 +1289,12 @@ static int check_page_writeback(struct extent_map_tree *tree,
  * Scheduling is not allowed, so the extent state tree is expected
  * to have one and only one object corresponding to this IO.
  */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_writepage(struct bio *bio, int err)
+#else
 static int end_bio_extent_writepage(struct bio *bio,
                                   unsigned int bytes_done, int err)
+#endif
 {
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -1287,12 +1303,15 @@ static int end_bio_extent_writepage(struct bio *bio,
        u64 end;
        int whole_page;
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        if (bio->bi_size)
                return 1;
+#endif
 
        do {
                struct page *page = bvec->bv_page;
-               start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                        bvec->bv_offset;
                end = start + bvec->bv_len - 1;
 
                if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@@ -1319,7 +1338,9 @@ static int end_bio_extent_writepage(struct bio *bio,
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        return 0;
+#endif
 }
 
 /*
@@ -1333,8 +1354,12 @@ static int end_bio_extent_writepage(struct bio *bio,
  * Scheduling is not allowed, so the extent state tree is expected
  * to have one and only one object corresponding to this IO.
  */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_readpage(struct bio *bio, int err)
+#else
 static int end_bio_extent_readpage(struct bio *bio,
                                   unsigned int bytes_done, int err)
+#endif
 {
        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -1344,12 +1369,15 @@ static int end_bio_extent_readpage(struct bio *bio,
        int whole_page;
        int ret;
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        if (bio->bi_size)
                return 1;
+#endif
 
        do {
                struct page *page = bvec->bv_page;
-               start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                       bvec->bv_offset;
                end = start + bvec->bv_len - 1;
 
                if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@@ -1385,7 +1413,9 @@ static int end_bio_extent_readpage(struct bio *bio,
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        return 0;
+#endif
 }
 
 /*
@@ -1393,8 +1423,12 @@ static int end_bio_extent_readpage(struct bio *bio,
  * the structs in the extent tree when done, and set the uptodate bits
  * as appropriate.
  */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_preparewrite(struct bio *bio, int err)
+#else
 static int end_bio_extent_preparewrite(struct bio *bio,
                                       unsigned int bytes_done, int err)
+#endif
 {
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -1402,12 +1436,15 @@ static int end_bio_extent_preparewrite(struct bio *bio,
        u64 start;
        u64 end;
 
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        if (bio->bi_size)
                return 1;
+#endif
 
        do {
                struct page *page = bvec->bv_page;
-               start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
+               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+                       bvec->bv_offset;
                end = start + bvec->bv_len - 1;
 
                if (--bvec >= bio->bi_io_vec)
@@ -1425,49 +1462,87 @@ static int end_bio_extent_preparewrite(struct bio *bio,
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
        return 0;
+#endif
 }
 
-static int submit_extent_page(int rw, struct extent_map_tree *tree,
-                             struct page *page, sector_t sector,
-                             size_t size, unsigned long offset,
-                             struct block_device *bdev,
-                             bio_end_io_t end_io_func)
+static struct bio *
+extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+                gfp_t gfp_flags)
 {
        struct bio *bio;
-       int ret = 0;
 
-       bio = bio_alloc(GFP_NOIO, 1);
+       bio = bio_alloc(gfp_flags, nr_vecs);
 
-       bio->bi_sector = sector;
-       bio->bi_bdev = bdev;
-       bio->bi_io_vec[0].bv_page = page;
-       bio->bi_io_vec[0].bv_len = size;
-       bio->bi_io_vec[0].bv_offset = offset;
-
-       bio->bi_vcnt = 1;
-       bio->bi_idx = 0;
-       bio->bi_size = size;
+       if (bio == NULL && (current->flags & PF_MEMALLOC)) {
+               while (!bio && (nr_vecs /= 2))
+                       bio = bio_alloc(gfp_flags, nr_vecs);
+       }
 
-       bio->bi_end_io = end_io_func;
-       bio->bi_private = tree;
+       if (bio) {
+               bio->bi_bdev = bdev;
+               bio->bi_sector = first_sector;
+       }
+       return bio;
+}
 
+static int submit_one_bio(int rw, struct bio *bio)
+{
+       int ret = 0;
        bio_get(bio);
        submit_bio(rw, bio);
-
        if (bio_flagged(bio, BIO_EOPNOTSUPP))
                ret = -EOPNOTSUPP;
-
        bio_put(bio);
        return ret;
 }
 
+static int submit_extent_page(int rw, struct extent_map_tree *tree,
+                             struct page *page, sector_t sector,
+                             size_t size, unsigned long offset,
+                             struct block_device *bdev,
+                             struct bio **bio_ret,
+                             int max_pages,
+                             bio_end_io_t end_io_func)
+{
+       int ret = 0;
+       struct bio *bio;
+       int nr;
+
+       if (bio_ret && *bio_ret) {
+               bio = *bio_ret;
+               if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
+                   bio_add_page(bio, page, size, offset) < size) {
+                       ret = submit_one_bio(rw, bio);
+                       bio = NULL;
+               } else {
+                       return 0;
+               }
+       }
+       nr = min(max_pages, bio_get_nr_vecs(bdev));
+       bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+       if (!bio) {
+               printk("failed to allocate bio nr %d\n", nr);
+       }
+       bio_add_page(bio, page, size, offset);
+       bio->bi_end_io = end_io_func;
+       bio->bi_private = tree;
+       if (bio_ret) {
+               *bio_ret = bio;
+       } else {
+               ret = submit_one_bio(rw, bio);
+       }
+
+       return ret;
+}
+
 void set_page_extent_mapped(struct page *page)
 {
        if (!PagePrivate(page)) {
                SetPagePrivate(page);
                WARN_ON(!page->mapping->a_ops->invalidatepage);
-               set_page_private(page, 1);
+               set_page_private(page, EXTENT_PAGE_PRIVATE);
                page_cache_get(page);
        }
 }
@@ -1481,7 +1556,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
                          get_extent_t *get_extent)
 {
        struct inode *inode = page->mapping->host;
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 page_end = start + PAGE_CACHE_SIZE - 1;
        u64 end;
        u64 cur = start;
@@ -1558,7 +1633,8 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
                if (!ret) {
                        ret = submit_extent_page(READ, tree, page,
                                                 sector, iosize, page_offset,
-                                                bdev, end_bio_extent_readpage);
+                                                bdev, NULL, 1,
+                                                end_bio_extent_readpage);
                }
                if (ret)
                        SetPageError(page);
@@ -1581,25 +1657,26 @@ EXPORT_SYMBOL(extent_read_full_page);
  * are found, they are marked writeback.  Then the lock bits are removed
  * and the end_io handler clears the writeback ranges
  */
-int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
-                         get_extent_t *get_extent,
-                         struct writeback_control *wbc)
+static int __extent_writepage(struct page *page, struct writeback_control *wbc,
+                             void *data)
 {
        struct inode *inode = page->mapping->host;
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       struct extent_page_data *epd = data;
+       struct extent_map_tree *tree = epd->tree;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 page_end = start + PAGE_CACHE_SIZE - 1;
        u64 end;
        u64 cur = start;
        u64 extent_offset;
        u64 last_byte = i_size_read(inode);
        u64 block_start;
+       u64 iosize;
        sector_t sector;
        struct extent_map *em;
        struct block_device *bdev;
        int ret;
        int nr = 0;
        size_t page_offset = 0;
-       size_t iosize;
        size_t blocksize;
        loff_t i_size = i_size_read(inode);
        unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -1659,7 +1736,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                        clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
                        break;
                }
-               em = get_extent(inode, page, page_offset, cur, end, 0);
+               em = epd->get_extent(inode, page, page_offset, cur, end, 1);
                if (IS_ERR(em) || !em) {
                        SetPageError(page);
                        break;
@@ -1702,9 +1779,12 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
                if (ret)
                        SetPageError(page);
                else {
+                       unsigned long nr = end_index + 1;
                        set_range_writeback(tree, cur, cur + iosize - 1);
+
                        ret = submit_extent_page(WRITE, tree, page, sector,
                                                 iosize, page_offset, bdev,
+                                                &epd->bio, nr,
                                                 end_bio_extent_writepage);
                        if (ret)
                                SetPageError(page);
@@ -1718,8 +1798,44 @@ done:
        unlock_page(page);
        return 0;
 }
+
+int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
+                         get_extent_t *get_extent,
+                         struct writeback_control *wbc)
+{
+       int ret;
+       struct extent_page_data epd = {
+               .bio = NULL,
+               .tree = tree,
+               .get_extent = get_extent,
+       };
+
+       ret = __extent_writepage(page, wbc, &epd);
+       if (epd.bio)
+               submit_one_bio(WRITE, epd.bio);
+       return ret;
+}
 EXPORT_SYMBOL(extent_write_full_page);
 
+int extent_writepages(struct extent_map_tree *tree,
+                     struct address_space *mapping,
+                     get_extent_t *get_extent,
+                     struct writeback_control *wbc)
+{
+       int ret;
+       struct extent_page_data epd = {
+               .bio = NULL,
+               .tree = tree,
+               .get_extent = get_extent,
+       };
+
+       ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
+       if (epd.bio)
+               submit_one_bio(WRITE, epd.bio);
+       return ret;
+}
+EXPORT_SYMBOL(extent_writepages);
+
 /*
  * basic invalidatepage code, this waits on any locked or writeback
  * ranges corresponding to the page, and then deletes any extent state
@@ -1728,7 +1844,7 @@ EXPORT_SYMBOL(extent_write_full_page);
 int extent_invalidatepage(struct extent_map_tree *tree,
                          struct page *page, unsigned long offset)
 {
-       u64 start = (page->index << PAGE_CACHE_SHIFT);
+       u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
        u64 end = start + PAGE_CACHE_SIZE - 1;
        size_t blocksize = page->mapping->host->i_sb->s_blocksize;
 
@@ -1770,7 +1886,7 @@ int extent_prepare_write(struct extent_map_tree *tree,
                         struct inode *inode, struct page *page,
                         unsigned from, unsigned to, get_extent_t *get_extent)
 {
-       u64 page_start = page->index << PAGE_CACHE_SHIFT;
+       u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
        u64 block_start;
        u64 orig_block_start;
@@ -1837,6 +1953,7 @@ int extent_prepare_write(struct extent_map_tree *tree,
                                       EXTENT_LOCKED, 0, NULL, GFP_NOFS);
                        ret = submit_extent_page(READ, tree, page,
                                         sector, iosize, page_offset, em->bdev,
+                                        NULL, 1,
                                         end_bio_extent_preparewrite);
                        iocount++;
                        block_start = block_start + iosize;
@@ -1868,7 +1985,7 @@ EXPORT_SYMBOL(extent_prepare_write);
 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
 {
        struct extent_map *em;
-       u64 start = page->index << PAGE_CACHE_SHIFT;
+       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
        u64 orig_start = start;
        int ret = 1;
@@ -1902,6 +2019,7 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
        struct inode *inode = mapping->host;
        u64 start = iblock << inode->i_blkbits;
        u64 end = start + (1 << inode->i_blkbits) - 1;
+       sector_t sector = 0;
        struct extent_map *em;
 
        em = get_extent(inode, NULL, 0, start, end, 0);
@@ -1910,90 +2028,147 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
 
        if (em->block_start == EXTENT_MAP_INLINE ||
            em->block_start == EXTENT_MAP_HOLE)
-               return 0;
+               goto out;
+
+       sector = (em->block_start + start - em->start) >> inode->i_blkbits;
+out:
+       free_extent_map(em);
+       return sector;
+}
+
+static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
+{
+       if (list_empty(&eb->lru)) {
+               extent_buffer_get(eb);
+               list_add(&eb->lru, &tree->buffer_lru);
+               tree->lru_size++;
+               if (tree->lru_size >= BUFFER_LRU_MAX) {
+                       struct extent_buffer *rm;
+                       rm = list_entry(tree->buffer_lru.prev,
+                                       struct extent_buffer, lru);
+                       tree->lru_size--;
+                       list_del(&rm->lru);
+                       free_extent_buffer(rm);
+               }
+       } else
+               list_move(&eb->lru, &tree->buffer_lru);
+       return 0;
+}
+static struct extent_buffer *find_lru(struct extent_map_tree *tree,
+                                     u64 start, unsigned long len)
+{
+       struct list_head *lru = &tree->buffer_lru;
+       struct list_head *cur = lru->next;
+       struct extent_buffer *eb;
+
+       if (list_empty(lru))
+               return NULL;
 
-       return (em->block_start + start - em->start) >> inode->i_blkbits;
+       do {
+               eb = list_entry(cur, struct extent_buffer, lru);
+               if (eb->start == start && eb->len == len) {
+                       extent_buffer_get(eb);
+                       return eb;
+               }
+               cur = cur->next;
+       } while (cur != lru);
+       return NULL;
+}
+
+static inline unsigned long num_extent_pages(u64 start, u64 len)
+{
+       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+               (start >> PAGE_CACHE_SHIFT);
 }
 
-static struct extent_buffer *__alloc_extent_buffer(gfp_t mask)
+static inline struct page *extent_buffer_page(struct extent_buffer *eb,
+                                             unsigned long i)
+{
+       struct page *p;
+       struct address_space *mapping;
+
+       if (i == 0)
+               return eb->first_page;
+       i += eb->start >> PAGE_CACHE_SHIFT;
+       mapping = eb->first_page->mapping;
+       read_lock_irq(&mapping->tree_lock);
+       p = radix_tree_lookup(&mapping->page_tree, i);
+       read_unlock_irq(&mapping->tree_lock);
+       return p;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
+                                                  u64 start,
+                                                  unsigned long len,
+                                                  gfp_t mask)
 {
        struct extent_buffer *eb = NULL;
 
-       spin_lock(&extent_buffers_lock);
-       if (!list_empty(&extent_buffers)) {
-               eb = list_entry(extent_buffers.next, struct extent_buffer,
-                               list);
-               list_del(&eb->list);
-               WARN_ON(nr_extent_buffers == 0);
-               nr_extent_buffers--;
+       spin_lock(&tree->lru_lock);
+       eb = find_lru(tree, start, len);
+       if (eb) {
+               goto lru_add;
        }
-       spin_unlock(&extent_buffers_lock);
+       spin_unlock(&tree->lru_lock);
 
        if (eb) {
                memset(eb, 0, sizeof(*eb));
        } else {
                eb = kmem_cache_zalloc(extent_buffer_cache, mask);
        }
-       spin_lock(&extent_buffers_lock);
-       list_add(&eb->leak_list, &buffers);
-       spin_unlock(&extent_buffers_lock);
+       INIT_LIST_HEAD(&eb->lru);
+       eb->start = start;
+       eb->len = len;
+       atomic_set(&eb->refs, 1);
 
+       spin_lock(&tree->lru_lock);
+lru_add:
+       add_lru(tree, eb);
+       spin_unlock(&tree->lru_lock);
        return eb;
 }
 
 static void __free_extent_buffer(struct extent_buffer *eb)
 {
-
-       spin_lock(&extent_buffers_lock);
-       list_del_init(&eb->leak_list);
-       spin_unlock(&extent_buffers_lock);
-
-       if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) {
-               kmem_cache_free(extent_buffer_cache, eb);
-       } else {
-               spin_lock(&extent_buffers_lock);
-               list_add(&eb->list, &extent_buffers);
-               nr_extent_buffers++;
-               spin_unlock(&extent_buffers_lock);
-       }
-}
-
-static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i)
-{
-       struct page *p;
-       if (i == 0)
-               return eb->first_page;
-       i += eb->start >> PAGE_CACHE_SHIFT;
-       p = find_get_page(eb->first_page->mapping, i);
-       page_cache_release(p);
-       return p;
+       kmem_cache_free(extent_buffer_cache, eb);
 }
 
 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
                                          u64 start, unsigned long len,
+                                         struct page *page0,
                                          gfp_t mask)
 {
-       unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) -
-                                 (start >> PAGE_CACHE_SHIFT) + 1;
+       unsigned long num_pages = num_extent_pages(start, len);
        unsigned long i;
        unsigned long index = start >> PAGE_CACHE_SHIFT;
        struct extent_buffer *eb;
        struct page *p;
        struct address_space *mapping = tree->mapping;
-       int uptodate = 0;
+       int uptodate = 1;
 
-       eb = __alloc_extent_buffer(mask);
+       eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb || IS_ERR(eb))
                return NULL;
 
-       eb->alloc_addr = __builtin_return_address(0);
-       eb->start = start;
-       eb->len = len;
-       atomic_set(&eb->refs, 1);
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               return eb;
 
-       for (i = 0; i < num_pages; i++, index++) {
+       if (page0) {
+               eb->first_page = page0;
+               i = 1;
+               index++;
+               page_cache_get(page0);
+               mark_page_accessed(page0);
+               set_page_extent_mapped(page0);
+               set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
+                                len << 2);
+       } else {
+               i = 0;
+       }
+       for (; i < num_pages; i++, index++) {
                p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
                if (!p) {
+                       WARN_ON(1);
                        /* make sure the free only frees the pages we've
                         * grabbed a reference on
                         */
@@ -2002,14 +2177,21 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
                        goto fail;
                }
                set_page_extent_mapped(p);
-               if (i == 0)
+               mark_page_accessed(p);
+               if (i == 0) {
                        eb->first_page = p;
+                       set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
+                                        len << 2);
+               } else {
+                       set_page_private(p, EXTENT_PAGE_PRIVATE);
+               }
                if (!PageUptodate(p))
                        uptodate = 0;
                unlock_page(p);
        }
        if (uptodate)
                eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
        return eb;
 fail:
        free_extent_buffer(eb);
@@ -2021,25 +2203,22 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
                                         u64 start, unsigned long len,
                                          gfp_t mask)
 {
-       unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) -
-                                 (start >> PAGE_CACHE_SHIFT) + 1;
-       unsigned long i;
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
+       unsigned long num_pages = num_extent_pages(start, len);
+       unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT;
        struct extent_buffer *eb;
        struct page *p;
        struct address_space *mapping = tree->mapping;
+       int uptodate = 1;
 
-       eb = __alloc_extent_buffer(mask);
+       eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb || IS_ERR(eb))
                return NULL;
 
-       eb->alloc_addr = __builtin_return_address(0);
-       eb->start = start;
-       eb->len = len;
-       atomic_set(&eb->refs, 1);
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               return eb;
 
        for (i = 0; i < num_pages; i++, index++) {
-               p = find_get_page(mapping, index);
+               p = find_lock_page(mapping, index);
                if (!p) {
                        /* make sure the free only frees the pages we've
                         * grabbed a reference on
@@ -2049,9 +2228,23 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
                        goto fail;
                }
                set_page_extent_mapped(p);
-               if (i == 0)
+               mark_page_accessed(p);
+
+               if (i == 0) {
                        eb->first_page = p;
+                       set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
+                                        len << 2);
+               } else {
+                       set_page_private(p, EXTENT_PAGE_PRIVATE);
+               }
+
+               if (!PageUptodate(p))
+                       uptodate = 0;
+               unlock_page(p);
        }
+       if (uptodate)
+               eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
        return eb;
 fail:
        free_extent_buffer(eb);
@@ -2070,12 +2263,9 @@ void free_extent_buffer(struct extent_buffer *eb)
        if (!atomic_dec_and_test(&eb->refs))
                return;
 
-       num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
-               (eb->start >> PAGE_CACHE_SHIFT) + 1;
+       num_pages = num_extent_pages(eb->start, eb->len);
 
-       if (eb->first_page)
-               page_cache_release(eb->first_page);
-       for (i = 1; i < num_pages; i++) {
+       for (i = 0; i < num_pages; i++) {
                page_cache_release(extent_buffer_page(eb, i));
        }
        __free_extent_buffer(eb);
@@ -2094,8 +2284,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree,
        u64 end = start + eb->len - 1;
 
        set = clear_extent_dirty(tree, start, end, GFP_NOFS);
-       num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
-               (eb->start >> PAGE_CACHE_SHIFT) + 1;
+       num_pages = num_extent_pages(eb->start, eb->len);
 
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
@@ -2107,8 +2296,8 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree,
                 */
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
                    ((i == num_pages - 1) &&
-                    ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
-                       start = page->index << PAGE_CACHE_SHIFT;
+                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
+                       start = (u64)page->index << PAGE_CACHE_SHIFT;
                        end  = start + PAGE_CACHE_SIZE - 1;
                        if (test_range_bit(tree, start, end,
                                           EXTENT_DIRTY, 0)) {
@@ -2134,7 +2323,29 @@ EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
 int set_extent_buffer_dirty(struct extent_map_tree *tree,
                             struct extent_buffer *eb)
 {
-       return set_range_dirty(tree, eb->start, eb->start + eb->len - 1);
+       unsigned long i;
+       unsigned long num_pages;
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+       for (i = 0; i < num_pages; i++) {
+               struct page *page = extent_buffer_page(eb, i);
+               /* writepage may need to do something special for the
+                * first page, we have to make sure page->private is
+                * properly set.  releasepage may drop page->private
+                * on us if the page isn't already dirty.
+                */
+               if (i == 0) {
+                       lock_page(page);
+                       set_page_private(page,
+                                        EXTENT_PAGE_PRIVATE_FIRST_PAGE |
+                                        eb->len << 2);
+               }
+               __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
+               if (i == 0)
+                       unlock_page(page);
+       }
+       return set_extent_dirty(tree, eb->start,
+                               eb->start + eb->len - 1, GFP_NOFS);
 }
 EXPORT_SYMBOL(set_extent_buffer_dirty);
 
@@ -2145,8 +2356,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree,
        struct page *page;
        unsigned long num_pages;
 
-       num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
-               (eb->start >> PAGE_CACHE_SHIFT) + 1;
+       num_pages = num_extent_pages(eb->start, eb->len);
 
        set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
                            GFP_NOFS);
@@ -2154,7 +2364,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree,
                page = extent_buffer_page(eb, i);
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
                    ((i == num_pages - 1) &&
-                    ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
+                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
                        check_page_uptodate(tree, page);
                        continue;
                }
@@ -2175,9 +2385,12 @@ int extent_buffer_uptodate(struct extent_map_tree *tree,
 EXPORT_SYMBOL(extent_buffer_uptodate);
 
 int read_extent_buffer_pages(struct extent_map_tree *tree,
-                            struct extent_buffer *eb, int wait)
+                            struct extent_buffer *eb,
+                            u64 start,
+                            int wait)
 {
        unsigned long i;
+       unsigned long start_i;
        struct page *page;
        int err;
        int ret = 0;
@@ -2186,14 +2399,20 @@ int read_extent_buffer_pages(struct extent_map_tree *tree,
        if (eb->flags & EXTENT_UPTODATE)
                return 0;
 
-       if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+       if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
                           EXTENT_UPTODATE, 1)) {
                return 0;
        }
+       if (start) {
+               WARN_ON(start < eb->start);
+               start_i = (start >> PAGE_CACHE_SHIFT) -
+                       (eb->start >> PAGE_CACHE_SHIFT);
+       } else {
+               start_i = 0;
+       }
 
-       num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
-               (eb->start >> PAGE_CACHE_SHIFT) + 1;
-       for (i = 0; i < num_pages; i++) {
+       num_pages = num_extent_pages(eb->start, eb->len);
+       for (i = start_i; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if (PageUptodate(page)) {
                        continue;
@@ -2219,14 +2438,15 @@ int read_extent_buffer_pages(struct extent_map_tree *tree,
                return ret;
        }
 
-       for (i = 0; i < num_pages; i++) {
+       for (i = start_i; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                wait_on_page_locked(page);
                if (!PageUptodate(page)) {
                        ret = -EIO;
                }
        }
-       eb->flags |= EXTENT_UPTODATE;
+       if (!ret)
+               eb->flags |= EXTENT_UPTODATE;
        return ret;
 }
 EXPORT_SYMBOL(read_extent_buffer_pages);
@@ -2242,22 +2462,25 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
        char *dst = (char *)dstv;
        size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
        unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       unsigned long num_pages = num_extent_pages(eb->start, eb->len);
 
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
+               if (!PageUptodate(page)) {
+                       printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
+                       WARN_ON(1);
+               }
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = kmap_atomic(page, KM_USER1);
                memcpy(dst, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
+               kunmap_atomic(kaddr, KM_USER1);
 
                dst += cur;
                len -= cur;
@@ -2267,37 +2490,67 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
 }
 EXPORT_SYMBOL(read_extent_buffer);
 
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                     unsigned long min_len,
-                     char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km)
+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
+                              unsigned long min_len, char **token, char **map,
+                              unsigned long *map_start,
+                              unsigned long *map_len, int km)
 {
        size_t offset = start & (PAGE_CACHE_SIZE - 1);
        char *kaddr;
+       struct page *p;
        size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
        unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-       unsigned long end_i = (start_offset + start + min_len) >>
-                               PAGE_CACHE_SHIFT;
+       unsigned long end_i = (start_offset + start + min_len - 1) >>
+               PAGE_CACHE_SHIFT;
 
        if (i != end_i)
                return -EINVAL;
 
-       WARN_ON(start > eb->len);
-
        if (i == 0) {
                offset = start_offset;
                *map_start = 0;
        } else {
+               offset = 0;
                *map_start = (i << PAGE_CACHE_SHIFT) - start_offset;
        }
+       if (start + min_len > eb->len) {
+printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
+               WARN_ON(1);
+       }
 
-       kaddr = kmap_atomic(extent_buffer_page(eb, i), km);
+       p = extent_buffer_page(eb, i);
+       WARN_ON(!PageUptodate(p));
+       kaddr = kmap_atomic(p, km);
        *token = kaddr;
        *map = kaddr + offset;
        *map_len = PAGE_CACHE_SIZE - offset;
        return 0;
 }
+EXPORT_SYMBOL(map_private_extent_buffer);
+
+int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
+                     unsigned long min_len,
+                     char **token, char **map,
+                     unsigned long *map_start,
+                     unsigned long *map_len, int km)
+{
+       int err;
+       int save = 0;
+       if (eb->map_token) {
+               unmap_extent_buffer(eb, eb->map_token, km);
+               eb->map_token = NULL;
+               save = 1;
+       }
+       err = map_private_extent_buffer(eb, start, min_len, token, map,
+                                      map_start, map_len, km);
+       if (!err && save) {
+               eb->map_token = *token;
+               eb->kaddr = *map;
+               eb->map_start = *map_start;
+               eb->map_len = *map_len;
+       }
+       return err;
+}
 EXPORT_SYMBOL(map_extent_buffer);
 
 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
@@ -2322,9 +2575,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
@@ -2361,18 +2612,16 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = kmap_atomic(page, KM_USER1);
                memcpy(kaddr + offset, src, cur);
-               kunmap_atomic(kaddr, KM_USER0);
+               kunmap_atomic(kaddr, KM_USER1);
 
                src += cur;
                len -= cur;
@@ -2395,9 +2644,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(eb, i);
@@ -2429,9 +2676,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 
        WARN_ON(src->len != dst_len);
 
-       offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1);
-       if (i == 0)
-               offset += start_offset;
+       offset = (start_offset + dst_offset) &
+               ((unsigned long)PAGE_CACHE_SIZE - 1);
 
        while(len > 0) {
                page = extent_buffer_page(dst, i);
@@ -2439,9 +2685,9 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 
                cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
 
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = kmap_atomic(page, KM_USER0);
                read_extent_buffer(src, kaddr + offset, src_offset, cur);
-               kunmap_atomic(kaddr, KM_USER1);
+               kunmap_atomic(kaddr, KM_USER0);
 
                src_offset += cur;
                len -= cur;
@@ -2511,23 +2757,18 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        }
 
        while(len > 0) {
-               dst_off_in_page = dst_offset &
+               dst_off_in_page = (start_offset + dst_offset) &
                        ((unsigned long)PAGE_CACHE_SIZE - 1);
-               src_off_in_page = src_offset &
+               src_off_in_page = (start_offset + src_offset) &
                        ((unsigned long)PAGE_CACHE_SIZE - 1);
 
                dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
                src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
 
-               if (src_i == 0)
-                       src_off_in_page += start_offset;
-               if (dst_i == 0)
-                       dst_off_in_page += start_offset;
-
                cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
                                               src_off_in_page));
-               cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE -
-                                              dst_off_in_page));
+               cur = min_t(unsigned long, cur,
+                       (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
 
                copy_pages(extent_buffer_page(dst, dst_i),
                           extent_buffer_page(dst, src_i),
@@ -2570,26 +2811,20 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
                src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
 
-               dst_off_in_page = dst_end &
+               dst_off_in_page = (start_offset + dst_end) &
                        ((unsigned long)PAGE_CACHE_SIZE - 1);
-               src_off_in_page = src_end &
+               src_off_in_page = (start_offset + src_end) &
                        ((unsigned long)PAGE_CACHE_SIZE - 1);
 
-               if (src_i == 0)
-                       src_off_in_page += start_offset;
-               if (dst_i == 0)
-                       dst_off_in_page += start_offset;
-
-               cur = min(len, src_off_in_page + 1);
+               cur = min_t(unsigned long, len, src_off_in_page + 1);
                cur = min(cur, dst_off_in_page + 1);
-
                move_pages(extent_buffer_page(dst, dst_i),
                           extent_buffer_page(dst, src_i),
                           dst_off_in_page - cur + 1,
                           src_off_in_page - cur + 1, cur);
 
-               dst_end -= cur - 1;
-               src_end -= cur - 1;
+               dst_end -= cur;
+               src_end -= cur;
                len -= cur;
        }
 }