]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'f2fs/dev'
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Feb 2016 00:05:51 +0000 (11:05 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Feb 2016 00:05:51 +0000 (11:05 +1100)
18 files changed:
Documentation/ABI/testing/sysfs-fs-f2fs
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/dir.c
fs/f2fs/extent_cache.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/node.c
fs/f2fs/node.h
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/xattr.c
include/linux/f2fs_fs.h

index e5200f354abfe933d3fbe69f919da2dcc0e585a6..a809f6005f1464ed7c86133db7b4b95d4a3c7388 100644 (file)
@@ -98,3 +98,17 @@ Date:                October 2015
 Contact:       "Chao Yu" <chao2.yu@samsung.com>
 Description:
                 Controls the count of nid pages to be readaheaded.
+
+What:          /sys/fs/f2fs/<disk>/dirty_nats_ratio
+Date:          January 2016
+Contact:       "Chao Yu" <chao2.yu@samsung.com>
+Description:
+                Controls dirty nat entries ratio threshold, if current
+                ratio exceeds configured threshold, checkpoint will
+                be triggered for flushing dirty nat entries.
+
+What:          /sys/fs/f2fs/<disk>/lifetime_write_kbytes
+Date:          January 2016
+Contact:       "Shuoran Liu" <liushuoran@huawei.com>
+Description:
+                Shows total written kbytes issued to disk.
index 3842af954cd5bc127f2f0aad13d0ff52b743779d..536bec99bd64d39570edd0f08ac5545e79ade5a2 100644 (file)
@@ -39,7 +39,7 @@ repeat:
                cond_resched();
                goto repeat;
        }
-       f2fs_wait_on_page_writeback(page, META);
+       f2fs_wait_on_page_writeback(page, META, true);
        SetPageUptodate(page);
        return page;
 }
@@ -232,13 +232,17 @@ static int f2fs_write_meta_page(struct page *page,
        if (unlikely(f2fs_cp_error(sbi)))
                goto redirty_out;
 
-       f2fs_wait_on_page_writeback(page, META);
        write_meta_page(sbi, page);
        dec_page_count(sbi, F2FS_DIRTY_META);
+
+       if (wbc->for_reclaim)
+               f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
+
        unlock_page(page);
 
-       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
+       if (unlikely(f2fs_cp_error(sbi)))
                f2fs_submit_merged_bio(sbi, META, WRITE);
+
        return 0;
 
 redirty_out:
@@ -252,13 +256,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
        struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
        long diff, written;
 
-       trace_f2fs_writepages(mapping->host, wbc, META);
-
        /* collect a number of dirty meta pages and write together */
        if (wbc->for_kupdate ||
                get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
                goto skip_write;
 
+       trace_f2fs_writepages(mapping->host, wbc, META);
+
        /* if mounting is failed, skip writing node pages */
        mutex_lock(&sbi->cp_mutex);
        diff = nr_pages_to_write(sbi, META, wbc);
@@ -269,6 +273,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
 
 skip_write:
        wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
+       trace_f2fs_writepages(mapping->host, wbc, META);
        return 0;
 }
 
@@ -315,6 +320,9 @@ continue_unlock:
                                goto continue_unlock;
                        }
 
+                       f2fs_wait_on_page_writeback(page, META, true);
+
+                       BUG_ON(PageWriteback(page));
                        if (!clear_page_dirty_for_io(page))
                                goto continue_unlock;
 
@@ -921,6 +929,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        int cp_payload_blks = __cp_payload(sbi);
        block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
        bool invalidate = false;
+       struct super_block *sb = sbi->sb;
+       struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
+       u64 kbytes_written;
 
        /*
         * This avoids to conduct wrong roll-forward operations and uses
@@ -1034,6 +1045,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
        write_data_summaries(sbi, start_blk);
        start_blk += data_sum_blocks;
+
+       /* Record write statistics in the hot node summary */
+       kbytes_written = sbi->kbytes_written;
+       if (sb->s_bdev->bd_part)
+               kbytes_written += BD_PART_WRITTEN(sbi);
+
+       seg_i->sum_blk->info.kbytes_written = cpu_to_le64(kbytes_written);
+
        if (__remain_node_summaries(cpc->reason)) {
                write_node_summaries(sbi, start_blk);
                start_blk += NR_CURSEG_NODE_TYPE;
index 5c06db17e41fa267f5b270061d2959b2a36803e4..03f948e84115df85ea68faadb856c7c3eef3383c 100644 (file)
@@ -67,7 +67,6 @@ static void f2fs_write_end_io(struct bio *bio)
                f2fs_restore_and_release_control_page(&page);
 
                if (unlikely(bio->bi_error)) {
-                       set_page_dirty(page);
                        set_bit(AS_EIO, &page->mapping->flags);
                        f2fs_stop_checkpoint(sbi);
                }
@@ -75,8 +74,7 @@ static void f2fs_write_end_io(struct bio *bio)
                dec_page_count(sbi, F2FS_WRITEBACK);
        }
 
-       if (!get_pages(sbi, F2FS_WRITEBACK) &&
-                       !list_empty(&sbi->cp_wait.task_list))
+       if (!get_pages(sbi, F2FS_WRITEBACK) && wq_has_sleeper(&sbi->cp_wait))
                wake_up(&sbi->cp_wait);
 
        bio_put(bio);
@@ -116,8 +114,60 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
        io->bio = NULL;
 }
 
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
-                               enum page_type type, int rw)
+static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
+                                               struct page *page, nid_t ino)
+{
+       struct bio_vec *bvec;
+       struct page *target;
+       int i;
+
+       if (!io->bio)
+               return false;
+
+       if (!inode && !page && !ino)
+               return true;
+
+       bio_for_each_segment_all(bvec, io->bio, i) {
+
+               if (bvec->bv_page->mapping) {
+                       target = bvec->bv_page;
+               } else {
+                       struct f2fs_crypto_ctx *ctx;
+
+                       /* encrypted page */
+                       ctx = (struct f2fs_crypto_ctx *)page_private(
+                                                               bvec->bv_page);
+                       target = ctx->w.control_page;
+               }
+
+               if (inode && inode == target->mapping->host)
+                       return true;
+               if (page && page == target)
+                       return true;
+               if (ino && ino == ino_of_node(target))
+                       return true;
+       }
+
+       return false;
+}
+
+static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
+                                               struct page *page, nid_t ino,
+                                               enum page_type type)
+{
+       enum page_type btype = PAGE_TYPE_OF_BIO(type);
+       struct f2fs_bio_info *io = &sbi->write_io[btype];
+       bool ret;
+
+       down_read(&io->io_rwsem);
+       ret = __has_merged_page(io, inode, page, ino);
+       up_read(&io->io_rwsem);
+       return ret;
+}
+
+static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
+                               struct inode *inode, struct page *page,
+                               nid_t ino, enum page_type type, int rw)
 {
        enum page_type btype = PAGE_TYPE_OF_BIO(type);
        struct f2fs_bio_info *io;
@@ -126,6 +176,9 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 
        down_write(&io->io_rwsem);
 
+       if (!__has_merged_page(io, inode, page, ino))
+               goto out;
+
        /* change META to META_FLUSH in the checkpoint procedure */
        if (type >= META_FLUSH) {
                io->fio.type = META_FLUSH;
@@ -135,9 +188,24 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
                        io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
        }
        __submit_merged_bio(io);
+out:
        up_write(&io->io_rwsem);
 }
 
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
+                                                                       int rw)
+{
+       __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
+}
+
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+                               struct inode *inode, struct page *page,
+                               nid_t ino, enum page_type type, int rw)
+{
+       if (has_merged_page(sbi, inode, page, ino, type))
+               __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
+}
+
 /*
  * Fill the locked page with data located in the block address.
  * Return unlocked page.
@@ -218,7 +286,7 @@ void set_data_blkaddr(struct dnode_of_data *dn)
        struct page *node_page = dn->node_page;
        unsigned int ofs_in_node = dn->ofs_in_node;
 
-       f2fs_wait_on_page_writeback(node_page, NODE);
+       f2fs_wait_on_page_writeback(node_page, NODE, true);
 
        rn = F2FS_NODE(node_page);
 
@@ -461,7 +529,6 @@ got_it:
 static int __allocate_data_block(struct dnode_of_data *dn)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
-       struct f2fs_inode_info *fi = F2FS_I(dn->inode);
        struct f2fs_summary sum;
        struct node_info ni;
        int seg = CURSEG_WARM_DATA;
@@ -489,7 +556,7 @@ alloc:
        set_data_blkaddr(dn);
 
        /* update i_size */
-       fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+       fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
                                                        dn->ofs_in_node;
        if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
                i_size_write(dn->inode,
@@ -497,67 +564,33 @@ alloc:
        return 0;
 }
 
-static int __allocate_data_blocks(struct inode *inode, loff_t offset,
-                                                       size_t count)
+ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 {
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct dnode_of_data dn;
-       u64 start = F2FS_BYTES_TO_BLK(offset);
-       u64 len = F2FS_BYTES_TO_BLK(count);
-       bool allocated;
-       u64 end_offset;
-       int err = 0;
-
-       while (len) {
-               f2fs_lock_op(sbi);
-
-               /* When reading holes, we need its node page */
-               set_new_dnode(&dn, inode, NULL, NULL, 0);
-               err = get_dnode_of_data(&dn, start, ALLOC_NODE);
-               if (err)
-                       goto out;
-
-               allocated = false;
-               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
-
-               while (dn.ofs_in_node < end_offset && len) {
-                       block_t blkaddr;
-
-                       if (unlikely(f2fs_cp_error(sbi))) {
-                               err = -EIO;
-                               goto sync_out;
-                       }
-
-                       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
-                       if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
-                               err = __allocate_data_block(&dn);
-                               if (err)
-                                       goto sync_out;
-                               allocated = true;
-                       }
-                       len--;
-                       start++;
-                       dn.ofs_in_node++;
-               }
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct f2fs_map_blocks map;
+       ssize_t ret = 0;
 
-               if (allocated)
-                       sync_inode_page(&dn);
+       map.m_lblk = F2FS_BYTES_TO_BLK(iocb->ki_pos);
+       map.m_len = F2FS_BLK_ALIGN(iov_iter_count(from));
+       map.m_next_pgofs = NULL;
 
-               f2fs_put_dnode(&dn);
-               f2fs_unlock_op(sbi);
+       if (f2fs_encrypted_inode(inode))
+               return 0;
 
-               f2fs_balance_fs(sbi, dn.node_changed);
+       if (iocb->ki_flags & IOCB_DIRECT) {
+               ret = f2fs_convert_inline_inode(inode);
+               if (ret)
+                       return ret;
+               return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
        }
-       return err;
-
-sync_out:
-       if (allocated)
-               sync_inode_page(&dn);
-       f2fs_put_dnode(&dn);
-out:
-       f2fs_unlock_op(sbi);
-       f2fs_balance_fs(sbi, dn.node_changed);
-       return err;
+       if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
+               ret = f2fs_convert_inline_inode(inode);
+               if (ret)
+                       return ret;
+       }
+       if (!f2fs_has_inline_data(inode))
+               return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+       return ret;
 }
 
 /*
@@ -588,13 +621,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
        /* it only supports block size == page size */
        pgofs = (pgoff_t)map->m_lblk;
 
-       if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+       if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
                map->m_pblk = ei.blk + pgofs - ei.fofs;
                map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
                map->m_flags = F2FS_MAP_MAPPED;
                goto out;
        }
 
+next_dnode:
        if (create)
                f2fs_lock_op(sbi);
 
@@ -602,120 +636,98 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
        set_new_dnode(&dn, inode, NULL, NULL, 0);
        err = get_dnode_of_data(&dn, pgofs, mode);
        if (err) {
-               if (err == -ENOENT)
+               if (err == -ENOENT) {
                        err = 0;
+                       if (map->m_next_pgofs)
+                               *map->m_next_pgofs =
+                                       get_next_page_offset(&dn, pgofs);
+               }
                goto unlock_out;
        }
 
-       if (dn.data_blkaddr == NEW_ADDR || dn.data_blkaddr == NULL_ADDR) {
+       end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+
+next_block:
+       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+       if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
                if (create) {
                        if (unlikely(f2fs_cp_error(sbi))) {
                                err = -EIO;
-                               goto put_out;
+                               goto sync_out;
+                       }
+                       if (flag == F2FS_GET_BLOCK_PRE_AIO) {
+                               if (blkaddr == NULL_ADDR)
+                                       err = reserve_new_block(&dn);
+                       } else {
+                               err = __allocate_data_block(&dn);
                        }
-                       err = __allocate_data_block(&dn);
                        if (err)
-                               goto put_out;
+                               goto sync_out;
                        allocated = true;
                        map->m_flags = F2FS_MAP_NEW;
+                       blkaddr = dn.data_blkaddr;
                } else {
+                       if (flag == F2FS_GET_BLOCK_FIEMAP &&
+                                               blkaddr == NULL_ADDR) {
+                               if (map->m_next_pgofs)
+                                       *map->m_next_pgofs = pgofs + 1;
+                       }
                        if (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                               dn.data_blkaddr != NEW_ADDR) {
+                                               blkaddr != NEW_ADDR) {
                                if (flag == F2FS_GET_BLOCK_BMAP)
                                        err = -ENOENT;
-                               goto put_out;
+                               goto sync_out;
                        }
-
-                       /*
-                        * preallocated unwritten block should be mapped
-                        * for fiemap.
-                        */
-                       if (dn.data_blkaddr == NEW_ADDR)
-                               map->m_flags = F2FS_MAP_UNWRITTEN;
                }
        }
 
-       map->m_flags |= F2FS_MAP_MAPPED;
-       map->m_pblk = dn.data_blkaddr;
-       map->m_len = 1;
+       if (map->m_len == 0) {
+               /* preallocated unwritten block should be mapped for fiemap. */
+               if (blkaddr == NEW_ADDR)
+                       map->m_flags |= F2FS_MAP_UNWRITTEN;
+               map->m_flags |= F2FS_MAP_MAPPED;
+
+               map->m_pblk = blkaddr;
+               map->m_len = 1;
+       } else if ((map->m_pblk != NEW_ADDR &&
+                       blkaddr == (map->m_pblk + ofs)) ||
+                       (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
+                       flag == F2FS_GET_BLOCK_PRE_DIO ||
+                       flag == F2FS_GET_BLOCK_PRE_AIO) {
+               ofs++;
+               map->m_len++;
+       } else {
+               goto sync_out;
+       }
 
-       end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
        dn.ofs_in_node++;
        pgofs++;
 
-get_next:
-       if (map->m_len >= maxblocks)
-               goto sync_out;
+       if (map->m_len < maxblocks) {
+               if (dn.ofs_in_node < end_offset)
+                       goto next_block;
 
-       if (dn.ofs_in_node >= end_offset) {
                if (allocated)
                        sync_inode_page(&dn);
-               allocated = false;
                f2fs_put_dnode(&dn);
 
                if (create) {
                        f2fs_unlock_op(sbi);
-                       f2fs_balance_fs(sbi, dn.node_changed);
-                       f2fs_lock_op(sbi);
-               }
-
-               set_new_dnode(&dn, inode, NULL, NULL, 0);
-               err = get_dnode_of_data(&dn, pgofs, mode);
-               if (err) {
-                       if (err == -ENOENT)
-                               err = 0;
-                       goto unlock_out;
+                       f2fs_balance_fs(sbi, allocated);
                }
-
-               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
-       }
-
-       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
-
-       if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
-               if (create) {
-                       if (unlikely(f2fs_cp_error(sbi))) {
-                               err = -EIO;
-                               goto sync_out;
-                       }
-                       err = __allocate_data_block(&dn);
-                       if (err)
-                               goto sync_out;
-                       allocated = true;
-                       map->m_flags |= F2FS_MAP_NEW;
-                       blkaddr = dn.data_blkaddr;
-               } else {
-                       /*
-                        * we only merge preallocated unwritten blocks
-                        * for fiemap.
-                        */
-                       if (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                       blkaddr != NEW_ADDR)
-                               goto sync_out;
-               }
-       }
-
-       /* Give more consecutive addresses for the readahead */
-       if ((map->m_pblk != NEW_ADDR &&
-                       blkaddr == (map->m_pblk + ofs)) ||
-                       (map->m_pblk == NEW_ADDR &&
-                       blkaddr == NEW_ADDR)) {
-               ofs++;
-               dn.ofs_in_node++;
-               pgofs++;
-               map->m_len++;
-               goto get_next;
+               allocated = false;
+               goto next_dnode;
        }
 
 sync_out:
        if (allocated)
                sync_inode_page(&dn);
-put_out:
        f2fs_put_dnode(&dn);
 unlock_out:
        if (create) {
                f2fs_unlock_op(sbi);
-               f2fs_balance_fs(sbi, dn.node_changed);
+               f2fs_balance_fs(sbi, allocated);
        }
 out:
        trace_f2fs_map_blocks(inode, map, err);
@@ -723,13 +735,15 @@ out:
 }
 
 static int __get_data_block(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh, int create, int flag)
+                       struct buffer_head *bh, int create, int flag,
+                       pgoff_t *next_pgofs)
 {
        struct f2fs_map_blocks map;
        int ret;
 
        map.m_lblk = iblock;
        map.m_len = bh->b_size >> inode->i_blkbits;
+       map.m_next_pgofs = next_pgofs;
 
        ret = f2fs_map_blocks(inode, &map, create, flag);
        if (!ret) {
@@ -741,16 +755,18 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
 }
 
 static int get_data_block(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh_result, int create, int flag)
+                       struct buffer_head *bh_result, int create, int flag,
+                       pgoff_t *next_pgofs)
 {
-       return __get_data_block(inode, iblock, bh_result, create, flag);
+       return __get_data_block(inode, iblock, bh_result, create,
+                                                       flag, next_pgofs);
 }
 
 static int get_data_block_dio(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create)
 {
        return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_DIO);
+                                               F2FS_GET_BLOCK_DIO, NULL);
 }
 
 static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -761,7 +777,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock,
                return -EFBIG;
 
        return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_BMAP);
+                                               F2FS_GET_BLOCK_BMAP, NULL);
 }
 
 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -779,6 +795,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 {
        struct buffer_head map_bh;
        sector_t start_blk, last_blk;
+       pgoff_t next_pgofs;
        loff_t isize;
        u64 logical = 0, phys = 0, size = 0;
        u32 flags = 0;
@@ -814,14 +831,15 @@ next:
        map_bh.b_size = len;
 
        ret = get_data_block(inode, start_blk, &map_bh, 0,
-                                       F2FS_GET_BLOCK_FIEMAP);
+                                       F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
        if (ret)
                goto out;
 
        /* HOLE */
        if (!buffer_mapped(&map_bh)) {
+               start_blk = next_pgofs;
                /* Go through holes util pass the EOF */
-               if (blk_to_logical(inode, start_blk++) < isize)
+               if (blk_to_logical(inode, start_blk) < isize)
                        goto prep_next;
                /* Found a hole beyond isize means no more extents.
                 * Note that the premise is that filesystems don't
@@ -889,6 +907,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
        map.m_lblk = 0;
        map.m_len = 0;
        map.m_flags = 0;
+       map.m_next_pgofs = NULL;
 
        for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
 
@@ -927,7 +946,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
                        map.m_len = last_block - block_in_file;
 
                        if (f2fs_map_blocks(inode, &map, 0,
-                                                       F2FS_GET_BLOCK_READ))
+                                               F2FS_GET_BLOCK_READ))
                                goto set_error_page;
                }
 got_it:
@@ -1177,12 +1196,18 @@ out:
        inode_dec_dirty_pages(inode);
        if (err)
                ClearPageUptodate(page);
+
+       if (wbc->for_reclaim) {
+               f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
+               remove_dirty_inode(inode);
+       }
+
        unlock_page(page);
        f2fs_balance_fs(sbi, need_balance_fs);
-       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
+
+       if (unlikely(f2fs_cp_error(sbi)))
                f2fs_submit_merged_bio(sbi, DATA, WRITE);
-               remove_dirty_inode(inode);
-       }
+
        return 0;
 
 redirty_out:
@@ -1282,7 +1307,8 @@ continue_unlock:
 
                        if (PageWriteback(page)) {
                                if (wbc->sync_mode != WB_SYNC_NONE)
-                                       f2fs_wait_on_page_writeback(page, DATA);
+                                       f2fs_wait_on_page_writeback(page,
+                                                               DATA, true);
                                else
                                        goto continue_unlock;
                        }
@@ -1339,8 +1365,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
        int ret;
        long diff;
 
-       trace_f2fs_writepages(mapping->host, wbc, DATA);
-
        /* deal with chardevs and other special file */
        if (!mapping->a_ops->writepage)
                return 0;
@@ -1362,14 +1386,16 @@ static int f2fs_write_data_pages(struct address_space *mapping,
        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                goto skip_write;
 
+       trace_f2fs_writepages(mapping->host, wbc, DATA);
+
        diff = nr_pages_to_write(sbi, DATA, wbc);
 
-       if (!S_ISDIR(inode->i_mode)) {
+       if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) {
                mutex_lock(&sbi->writepages);
                locked = true;
        }
        ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
-       f2fs_submit_merged_bio(sbi, DATA, WRITE);
+       f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
        if (locked)
                mutex_unlock(&sbi->writepages);
 
@@ -1380,6 +1406,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 
 skip_write:
        wbc->pages_skipped += get_dirty_pages(inode);
+       trace_f2fs_writepages(mapping->host, wbc, DATA);
        return 0;
 }
 
@@ -1406,6 +1433,14 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
        struct extent_info ei;
        int err = 0;
 
+       /*
+        * we already allocated all the blocks, so we don't need to get
+        * the block addresses when there is no need to fill the page.
+        */
+       if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
+                                       len == PAGE_CACHE_SIZE)
+               return 0;
+
        if (f2fs_has_inline_data(inode) ||
                        (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
                f2fs_lock_op(sbi);
@@ -1425,7 +1460,7 @@ restart:
                if (pos + len <= MAX_INLINE_DATA) {
                        read_inline_data(page, ipage);
                        set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
-                       sync_inode_page(&dn);
+                       set_inline_node(ipage);
                } else {
                        err = f2fs_convert_inline_page(&dn, page);
                        if (err)
@@ -1439,13 +1474,9 @@ restart:
                if (f2fs_lookup_extent_cache(inode, index, &ei)) {
                        dn.data_blkaddr = ei.blk + index - ei.fofs;
                } else {
-                       bool restart = false;
-
                        /* hole case */
                        err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
-                       if (err || (!err && dn.data_blkaddr == NULL_ADDR))
-                               restart = true;
-                       if (restart) {
+                       if (err || (!err && dn.data_blkaddr == NULL_ADDR)) {
                                f2fs_put_dnode(&dn);
                                f2fs_lock_op(sbi);
                                locked = true;
@@ -1514,7 +1545,7 @@ repeat:
                }
        }
 
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, false);
 
        /* wait for GCed encrypted page writeback */
        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
@@ -1592,7 +1623,6 @@ static int f2fs_write_end(struct file *file,
        if (pos + copied > i_size_read(inode)) {
                i_size_write(inode, pos + copied);
                mark_inode_dirty(inode);
-               update_inode_page(inode);
        }
 
        f2fs_put_page(page, 1);
@@ -1617,34 +1647,21 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
 static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                              loff_t offset)
 {
-       struct file *file = iocb->ki_filp;
-       struct address_space *mapping = file->f_mapping;
+       struct address_space *mapping = iocb->ki_filp->f_mapping;
        struct inode *inode = mapping->host;
        size_t count = iov_iter_count(iter);
        int err;
 
-       /* we don't need to use inline_data strictly */
-       err = f2fs_convert_inline_inode(inode);
+       err = check_direct_IO(inode, iter, offset);
        if (err)
                return err;
 
        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                return 0;
 
-       err = check_direct_IO(inode, iter, offset);
-       if (err)
-               return err;
-
        trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 
-       if (iov_iter_rw(iter) == WRITE) {
-               err = __allocate_data_blocks(inode, offset, count);
-               if (err)
-                       goto out;
-       }
-
        err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
-out:
        if (err < 0 && iov_iter_rw(iter) == WRITE)
                f2fs_write_failed(mapping, offset + count);
 
index faa7495e2d7e62effef70b681c52ba2c75ca36d9..8950fc3cc2f7578520bfb340e80c7269b3dd8064 100644 (file)
@@ -296,7 +296,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
 {
        enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
        lock_page(page);
-       f2fs_wait_on_page_writeback(page, type);
+       f2fs_wait_on_page_writeback(page, type, true);
        de->ino = cpu_to_le32(inode->i_ino);
        set_de_type(de, inode->i_mode);
        f2fs_dentry_kunmap(dir, page);
@@ -311,7 +311,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
 {
        struct f2fs_inode *ri;
 
-       f2fs_wait_on_page_writeback(ipage, NODE);
+       f2fs_wait_on_page_writeback(ipage, NODE, true);
 
        /* copy name info. to this inode page */
        ri = F2FS_INODE(ipage);
@@ -598,7 +598,7 @@ start:
        ++level;
        goto start;
 add_dentry:
-       f2fs_wait_on_page_writeback(dentry_page, DATA);
+       f2fs_wait_on_page_writeback(dentry_page, DATA, true);
 
        if (inode) {
                down_write(&F2FS_I(inode)->i_sem);
@@ -709,7 +709,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
                return f2fs_delete_inline_entry(dentry, page, dir, inode);
 
        lock_page(page);
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
 
        dentry_blk = page_address(page);
        bit_pos = dentry - dentry_blk->dentry;
index ccd5c636d3fe026d0c5379d4062ee204fc7faa17..071a1b19e5afb793e8eeea924af9ce32eca30a76 100644 (file)
@@ -33,6 +33,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 
        en->ei = *ei;
        INIT_LIST_HEAD(&en->list);
+       en->et = et;
 
        rb_link_node(&en->rb_node, parent, p);
        rb_insert_color(&en->rb_node, &et->root);
@@ -50,6 +51,24 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
 
        if (et->cached_en == en)
                et->cached_en = NULL;
+       kmem_cache_free(extent_node_slab, en);
+}
+
+/*
+ * Flow to release an extent_node:
+ * 1. list_del_init
+ * 2. __detach_extent_node
+ * 3. kmem_cache_free.
+ */
+static void __release_extent_node(struct f2fs_sb_info *sbi,
+                       struct extent_tree *et, struct extent_node *en)
+{
+       spin_lock(&sbi->extent_lock);
+       f2fs_bug_on(sbi, list_empty(&en->list));
+       list_del_init(&en->list);
+       spin_unlock(&sbi->extent_lock);
+
+       __detach_extent_node(sbi, et, en);
 }
 
 static struct extent_tree *__grab_extent_tree(struct inode *inode)
@@ -129,7 +148,7 @@ static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
 }
 
 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
-                                       struct extent_tree *et, bool free_all)
+                                       struct extent_tree *et)
 {
        struct rb_node *node, *next;
        struct extent_node *en;
@@ -139,18 +158,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
        while (node) {
                next = rb_next(node);
                en = rb_entry(node, struct extent_node, rb_node);
-
-               if (free_all) {
-                       spin_lock(&sbi->extent_lock);
-                       if (!list_empty(&en->list))
-                               list_del_init(&en->list);
-                       spin_unlock(&sbi->extent_lock);
-               }
-
-               if (free_all || list_empty(&en->list)) {
-                       __detach_extent_node(sbi, et, en);
-                       kmem_cache_free(extent_node_slab, en);
-               }
+               __release_extent_node(sbi, et, en);
                node = next;
        }
 
@@ -232,9 +240,10 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
        if (en) {
                *ei = en->ei;
                spin_lock(&sbi->extent_lock);
-               if (!list_empty(&en->list))
+               if (!list_empty(&en->list)) {
                        list_move_tail(&en->list, &sbi->extent_list);
-               et->cached_en = en;
+                       et->cached_en = en;
+               }
                spin_unlock(&sbi->extent_lock);
                ret = true;
        }
@@ -329,7 +338,6 @@ lookup_neighbors:
 
 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
                                struct extent_tree *et, struct extent_info *ei,
-                               struct extent_node **den,
                                struct extent_node *prev_ex,
                                struct extent_node *next_ex)
 {
@@ -342,20 +350,25 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
        }
 
        if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
-               if (en) {
-                       __detach_extent_node(sbi, et, prev_ex);
-                       *den = prev_ex;
-               }
+               if (en)
+                       __release_extent_node(sbi, et, prev_ex);
                next_ex->ei.fofs = ei->fofs;
                next_ex->ei.blk = ei->blk;
                next_ex->ei.len += ei->len;
                en = next_ex;
        }
 
-       if (en) {
-               __try_update_largest_extent(et, en);
+       if (!en)
+               return NULL;
+
+       __try_update_largest_extent(et, en);
+
+       spin_lock(&sbi->extent_lock);
+       if (!list_empty(&en->list)) {
+               list_move_tail(&en->list, &sbi->extent_list);
                et->cached_en = en;
        }
+       spin_unlock(&sbi->extent_lock);
        return en;
 }
 
@@ -391,7 +404,12 @@ do_insert:
                return NULL;
 
        __try_update_largest_extent(et, en);
+
+       /* update in global extent list */
+       spin_lock(&sbi->extent_lock);
+       list_add_tail(&en->list, &sbi->extent_list);
        et->cached_en = en;
+       spin_unlock(&sbi->extent_lock);
        return en;
 }
 
@@ -479,7 +497,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
                if (parts)
                        __try_update_largest_extent(et, en);
                else
-                       __detach_extent_node(sbi, et, en);
+                       __release_extent_node(sbi, et, en);
 
                /*
                 * if original extent is split into zero or two parts, extent
@@ -490,31 +508,15 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
                        insert_p = NULL;
                        insert_parent = NULL;
                }
-
-               /* update in global extent list */
-               spin_lock(&sbi->extent_lock);
-               if (!parts && !list_empty(&en->list))
-                       list_del(&en->list);
-               if (en1)
-                       list_add_tail(&en1->list, &sbi->extent_list);
-               spin_unlock(&sbi->extent_lock);
-
-               /* release extent node */
-               if (!parts)
-                       kmem_cache_free(extent_node_slab, en);
-
                en = next_en;
        }
 
        /* 3. update extent in extent cache */
        if (blkaddr) {
-               struct extent_node *den = NULL;
 
                set_extent_info(&ei, fofs, blkaddr, len);
-               en1 = __try_merge_extent_node(sbi, et, &ei, &den,
-                                                       prev_en, next_en);
-               if (!en1)
-                       en1 = __insert_extent_tree(sbi, et, &ei,
+               if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
+                       __insert_extent_tree(sbi, et, &ei,
                                                insert_p, insert_parent);
 
                /* give up extent_cache, if split and small updates happen */
@@ -524,24 +526,10 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
                        et->largest.len = 0;
                        set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
                }
-
-               spin_lock(&sbi->extent_lock);
-               if (en1) {
-                       if (list_empty(&en1->list))
-                               list_add_tail(&en1->list, &sbi->extent_list);
-                       else
-                               list_move_tail(&en1->list, &sbi->extent_list);
-               }
-               if (den && !list_empty(&den->list))
-                       list_del(&den->list);
-               spin_unlock(&sbi->extent_lock);
-
-               if (den)
-                       kmem_cache_free(extent_node_slab, den);
        }
 
        if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
-               __free_extent_tree(sbi, et, true);
+               __free_extent_tree(sbi, et);
 
        write_unlock(&et->lock);
 
@@ -550,14 +538,10 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 {
-       struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
        struct extent_tree *et, *next;
-       struct extent_node *en, *tmp;
-       unsigned long ino = F2FS_ROOT_INO(sbi);
-       unsigned int found;
+       struct extent_node *en;
        unsigned int node_cnt = 0, tree_cnt = 0;
        int remained;
-       bool do_free = false;
 
        if (!test_opt(sbi, EXTENT_CACHE))
                return 0;
@@ -572,10 +556,10 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
        list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
                if (atomic_read(&et->node_cnt)) {
                        write_lock(&et->lock);
-                       node_cnt += __free_extent_tree(sbi, et, true);
+                       node_cnt += __free_extent_tree(sbi, et);
                        write_unlock(&et->lock);
                }
-
+               f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
                list_del_init(&et->list);
                radix_tree_delete(&sbi->extent_tree_root, et->ino);
                kmem_cache_free(extent_tree_slab, et);
@@ -585,6 +569,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 
                if (node_cnt + tree_cnt >= nr_shrink)
                        goto unlock_out;
+               cond_resched();
        }
        up_write(&sbi->extent_tree_lock);
 
@@ -596,42 +581,29 @@ free_node:
        remained = nr_shrink - (node_cnt + tree_cnt);
 
        spin_lock(&sbi->extent_lock);
-       list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
-               if (!remained--)
+       for (; remained > 0; remained--) {
+               if (list_empty(&sbi->extent_list))
                        break;
-               list_del_init(&en->list);
-               do_free = true;
-       }
-       spin_unlock(&sbi->extent_lock);
-
-       if (do_free == false)
-               goto unlock_out;
-
-       /*
-        * reset ino for searching victims from beginning of global extent tree.
-        */
-       ino = F2FS_ROOT_INO(sbi);
-
-       while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
-                               (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
-               unsigned i;
-
-               ino = treevec[found - 1]->ino + 1;
-               for (i = 0; i < found; i++) {
-                       struct extent_tree *et = treevec[i];
+               en = list_first_entry(&sbi->extent_list,
+                                       struct extent_node, list);
+               et = en->et;
+               if (!write_trylock(&et->lock)) {
+                       /* refresh this extent node's position in extent list */
+                       list_move_tail(&en->list, &sbi->extent_list);
+                       continue;
+               }
 
-                       if (!atomic_read(&et->node_cnt))
-                               continue;
+               list_del_init(&en->list);
+               spin_unlock(&sbi->extent_lock);
 
-                       if (write_trylock(&et->lock)) {
-                               node_cnt += __free_extent_tree(sbi, et, false);
-                               write_unlock(&et->lock);
-                       }
+               __detach_extent_node(sbi, et, en);
 
-                       if (node_cnt + tree_cnt >= nr_shrink)
-                               goto unlock_out;
-               }
+               write_unlock(&et->lock);
+               node_cnt++;
+               spin_lock(&sbi->extent_lock);
        }
+       spin_unlock(&sbi->extent_lock);
+
 unlock_out:
        up_write(&sbi->extent_tree_lock);
 out:
@@ -650,7 +622,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
                return 0;
 
        write_lock(&et->lock);
-       node_cnt = __free_extent_tree(sbi, et, true);
+       node_cnt = __free_extent_tree(sbi, et);
        write_unlock(&et->lock);
 
        return node_cnt;
@@ -701,7 +673,6 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
 
 void f2fs_update_extent_cache(struct dnode_of_data *dn)
 {
-       struct f2fs_inode_info *fi = F2FS_I(dn->inode);
        pgoff_t fofs;
 
        if (!f2fs_may_extent_tree(dn->inode))
@@ -710,8 +681,8 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn)
        f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
 
 
-       fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
-                                                       dn->ofs_in_node;
+       fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+                                                               dn->ofs_in_node;
 
        if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
                sync_inode_page(dn);
index ff79054c6cf6a5bfe254abaf203ae94acd44739b..f6a841b85d40a45bb720afa3a04b8e3a42373d90 100644 (file)
@@ -354,6 +354,7 @@ struct extent_node {
        struct rb_node rb_node;         /* rb node located in rb-tree */
        struct list_head list;          /* node in global extent list of sbi */
        struct extent_info ei;          /* extent info */
+       struct extent_tree *et;         /* extent tree pointer */
 };
 
 struct extent_tree {
@@ -382,6 +383,7 @@ struct f2fs_map_blocks {
        block_t m_lblk;
        unsigned int m_len;
        unsigned int m_flags;
+       pgoff_t *m_next_pgofs;          /* point next possible non-hole pgofs */
 };
 
 /* for flag in get_data_block */
@@ -389,6 +391,8 @@ struct f2fs_map_blocks {
 #define F2FS_GET_BLOCK_DIO             1
 #define F2FS_GET_BLOCK_FIEMAP          2
 #define F2FS_GET_BLOCK_BMAP            3
+#define F2FS_GET_BLOCK_PRE_DIO         4
+#define F2FS_GET_BLOCK_PRE_AIO         5
 
 /*
  * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
@@ -515,6 +519,7 @@ struct f2fs_nm_info {
        nid_t next_scan_nid;            /* the next nid to be scanned */
        unsigned int ram_thresh;        /* control the memory footprint */
        unsigned int ra_nid_pages;      /* # of nid pages to be readaheaded */
+       unsigned int dirty_nats_ratio;  /* control dirty nats ratio threshold */
 
        /* NAT cache management */
        struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -549,6 +554,8 @@ struct dnode_of_data {
        unsigned int ofs_in_node;       /* data offset in the node page */
        bool inode_page_locked;         /* inode page is locked or not */
        bool node_changed;              /* is node block changed */
+       char cur_level;                 /* level of hole node page */
+       char max_level;                 /* level of current page located */
        block_t data_blkaddr;           /* block address of the node block */
 };
 
@@ -844,8 +851,19 @@ struct f2fs_sb_info {
        struct list_head s_list;
        struct mutex umount_mutex;
        unsigned int shrinker_run_no;
+
+       /* For write statistics */
+       u64 sectors_written_start;
+       u64 kbytes_written;
 };
 
+/* For write statistics. Suppose sector size is 512 bytes,
+ * and the return value is in kbytes. s is of struct f2fs_sb_info.
+ */
+#define BD_PART_WRITTEN(s)                                              \
+(((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) -             \
+               s->sectors_written_start) >> 1)
+
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
 {
        sbi->last_time[type] = jiffies;
@@ -1525,9 +1543,9 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
        return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
 }
 
-static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
+static inline unsigned int addrs_per_inode(struct inode *inode)
 {
-       if (f2fs_has_inline_xattr(&fi->vfs_inode))
+       if (f2fs_has_inline_xattr(inode))
                return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
        return DEF_ADDRS_PER_INODE;
 }
@@ -1681,10 +1699,10 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
         (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
 
 /* get offset of first page in next direct node */
-#define PGOFS_OF_NEXT_DNODE(pgofs, fi)                         \
-       ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) :  \
-       (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) /       \
-       ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+#define PGOFS_OF_NEXT_DNODE(pgofs, inode)                              \
+       ((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) :    \
+       (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) /    \
+       ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
 
 /*
  * file.c
@@ -1780,6 +1798,7 @@ int need_dentry_mark(struct f2fs_sb_info *, nid_t);
 bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
 bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
 void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
+pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t);
 int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
 int truncate_inode_blocks(struct inode *, pgoff_t);
 int truncate_xattr_node(struct inode *, struct page *);
@@ -1836,7 +1855,7 @@ void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
                                block_t, block_t, unsigned char, bool);
 void allocate_data_block(struct f2fs_sb_info *, struct page *,
                block_t, block_t *, struct f2fs_summary *, int);
-void f2fs_wait_on_page_writeback(struct page *, enum page_type);
+void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
 void write_data_summaries(struct f2fs_sb_info *, block_t);
 void write_node_summaries(struct f2fs_sb_info *, block_t);
@@ -1881,11 +1900,14 @@ void destroy_checkpoint_caches(void);
  * data.c
  */
 void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
+                               struct page *, nid_t, enum page_type, int);
 int f2fs_submit_page_bio(struct f2fs_io_info *);
 void f2fs_submit_page_mbio(struct f2fs_io_info *);
 void set_data_blkaddr(struct dnode_of_data *);
 int reserve_new_block(struct dnode_of_data *);
 int f2fs_get_block(struct dnode_of_data *, pgoff_t);
+ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
 int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
 struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
 struct page *find_data_page(struct inode *, pgoff_t);
@@ -1902,7 +1924,7 @@ int f2fs_release_page(struct page *, gfp_t);
  */
 int start_gc_thread(struct f2fs_sb_info *);
 void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
+block_t start_bidx_of_node(unsigned int, struct inode *);
 int f2fs_gc(struct f2fs_sb_info *, bool);
 void build_gc_manager(struct f2fs_sb_info *);
 
index ea272be62677004d29c8018691ea442c2c992bb1..a4362d4d714b09f838bf46cad48d6f7aa62de2fd 100644 (file)
@@ -86,7 +86,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
        trace_f2fs_vm_page_mkwrite(page, DATA);
 mapped:
        /* fill the page */
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, false);
 
        /* wait for GCed encrypted page writeback */
        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
@@ -358,15 +358,14 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
                } else if (err == -ENOENT) {
                        /* direct node does not exists */
                        if (whence == SEEK_DATA) {
-                               pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
-                                                       F2FS_I(inode));
+                               pgofs = get_next_page_offset(&dn, pgofs);
                                continue;
                        } else {
                                goto found;
                        }
                }
 
-               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+               end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
 
                /* find data/hole in dnode block */
                for (; dn.ofs_in_node < end_offset;
@@ -480,7 +479,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
                 * we will invalidate all blkaddr in the whole range.
                 */
                fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
-                                               F2FS_I(dn->inode)) + ofs;
+                                                       dn->inode) + ofs;
                f2fs_update_extent_cache_range(dn, fofs, 0, len);
                dec_valid_block_count(sbi, dn->inode, nr_free);
                sync_inode_page(dn);
@@ -521,7 +520,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
        if (IS_ERR(page))
                return 0;
 truncate_out:
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
        zero_user(page, offset, PAGE_CACHE_SIZE - offset);
        if (!cache_only || !f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode))
                set_page_dirty(page);
@@ -568,7 +567,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
                goto out;
        }
 
-       count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+       count = ADDRS_PER_PAGE(dn.node_page, inode);
 
        count -= dn.ofs_in_node;
        f2fs_bug_on(sbi, count < 0);
@@ -743,7 +742,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
        if (IS_ERR(page))
                return PTR_ERR(page);
 
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
        zero_user(page, start, len);
        set_page_dirty(page);
        f2fs_put_page(page, 1);
@@ -768,7 +767,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
                        return err;
                }
 
-               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+               end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
                count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
 
                f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
@@ -892,7 +891,7 @@ static int __exchange_data_block(struct inode *inode, pgoff_t src,
                psrc = get_lock_data_page(inode, src, true);
                if (IS_ERR(psrc))
                        return PTR_ERR(psrc);
-               pdst = get_new_data_page(inode, NULL, dst, false);
+               pdst = get_new_data_page(inode, NULL, dst, true);
                if (IS_ERR(pdst)) {
                        f2fs_put_page(psrc, 1);
                        return PTR_ERR(pdst);
@@ -1648,7 +1647,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
                                        struct f2fs_defragment *range)
 {
        struct inode *inode = file_inode(filp);
-       struct f2fs_map_blocks map;
+       struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
        struct extent_info ei;
        pgoff_t pg_start, pg_end;
        unsigned int blk_per_seg = sbi->blocks_per_seg;
@@ -1874,14 +1873,32 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-       struct inode *inode = file_inode(iocb->ki_filp);
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       ssize_t ret;
 
        if (f2fs_encrypted_inode(inode) &&
                                !f2fs_has_encryption_key(inode) &&
                                f2fs_get_encryption_info(inode))
                return -EACCES;
 
-       return generic_file_write_iter(iocb, from);
+       inode_lock(inode);
+       ret = generic_write_checks(iocb, from);
+       if (ret > 0) {
+               ret = f2fs_preallocate_blocks(iocb, from);
+               if (!ret)
+                       ret = __generic_file_write_iter(iocb, from);
+       }
+       inode_unlock(inode);
+
+       if (ret > 0) {
+               ssize_t err;
+
+               err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+               if (err < 0)
+                       ret = err;
+       }
+       return ret;
 }
 
 #ifdef CONFIG_COMPAT
index f610c2a9bdde9561d3be71ab4d674376db8ec401..47ade3542fbdd679471dfc9927f6d2782034c45f 100644 (file)
@@ -245,6 +245,18 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
                return get_cb_cost(sbi, segno);
 }
 
+static unsigned int count_bits(const unsigned long *addr,
+                               unsigned int offset, unsigned int len)
+{
+       unsigned int end = offset + len, sum = 0;
+
+       while (offset < end) {
+               if (test_bit(offset++, addr))
+                       ++sum;
+       }
+       return sum;
+}
+
 /*
  * This function is called from two paths.
  * One is garbage collection and the other is SSR segment selection.
@@ -260,7 +272,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
        struct victim_sel_policy p;
        unsigned int secno, max_cost;
        unsigned int last_segment = MAIN_SEGS(sbi);
-       int nsearched = 0;
+       unsigned int nsearched = 0;
 
        mutex_lock(&dirty_i->seglist_lock);
 
@@ -295,26 +307,31 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
                }
 
                p.offset = segno + p.ofs_unit;
-               if (p.ofs_unit > 1)
+               if (p.ofs_unit > 1) {
                        p.offset -= segno % p.ofs_unit;
+                       nsearched += count_bits(p.dirty_segmap,
+                                               p.offset - p.ofs_unit,
+                                               p.ofs_unit);
+               } else {
+                       nsearched++;
+               }
+
 
                secno = GET_SECNO(sbi, segno);
 
                if (sec_usage_check(sbi, secno))
-                       continue;
+                       goto next;
                if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
-                       continue;
+                       goto next;
 
                cost = get_gc_cost(sbi, segno, &p);
 
                if (p.min_cost > cost) {
                        p.min_segno = segno;
                        p.min_cost = cost;
-               } else if (unlikely(cost == max_cost)) {
-                       continue;
                }
-
-               if (nsearched++ >= p.max_search) {
+next:
+               if (nsearched >= p.max_search) {
                        sbi->last_victim[p.gc_mode] = segno;
                        break;
                }
@@ -399,7 +416,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
  * On validity, copy that node with cold status, otherwise (invalid node)
  * ignore that.
  */
-static int gc_node_segment(struct f2fs_sb_info *sbi,
+static void gc_node_segment(struct f2fs_sb_info *sbi,
                struct f2fs_summary *sum, unsigned int segno, int gc_type)
 {
        bool initial = true;
@@ -419,7 +436,7 @@ next_step:
 
                /* stop BG_GC if there is not enough free sections. */
                if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
-                       return 0;
+                       return;
 
                if (check_valid_map(sbi, segno, off) == 0)
                        continue;
@@ -446,7 +463,7 @@ next_step:
 
                /* set page dirty and write it */
                if (gc_type == FG_GC) {
-                       f2fs_wait_on_page_writeback(node_page, NODE);
+                       f2fs_wait_on_page_writeback(node_page, NODE, true);
                        set_page_dirty(node_page);
                } else {
                        if (!PageWriteback(node_page))
@@ -460,20 +477,6 @@ next_step:
                initial = false;
                goto next_step;
        }
-
-       if (gc_type == FG_GC) {
-               struct writeback_control wbc = {
-                       .sync_mode = WB_SYNC_ALL,
-                       .nr_to_write = LONG_MAX,
-                       .for_reclaim = 0,
-               };
-               sync_node_pages(sbi, 0, &wbc);
-
-               /* return 1 only if FG_GC succefully reclaimed one */
-               if (get_valid_blocks(sbi, segno, 1) == 0)
-                       return 1;
-       }
-       return 0;
 }
 
 /*
@@ -483,7 +486,7 @@ next_step:
  * as indirect or double indirect node blocks, are given, it must be a caller's
  * bug.
  */
-block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
+block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
 {
        unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
        unsigned int bidx;
@@ -500,7 +503,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
                int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
                bidx = node_ofs - 5 - dec;
        }
-       return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
+       return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
 }
 
 static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -567,7 +570,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
         * don't cache encrypted data into meta inode until previous dirty
         * data were writebacked to avoid racing between GC and flush.
         */
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
 
        get_node_info(fio.sbi, dn.nid, &ni);
        set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
@@ -596,14 +599,14 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
                goto put_page_out;
 
        set_page_dirty(fio.encrypted_page);
-       f2fs_wait_on_page_writeback(fio.encrypted_page, DATA);
+       f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
        if (clear_page_dirty_for_io(fio.encrypted_page))
                dec_page_count(fio.sbi, F2FS_DIRTY_META);
 
        set_page_writeback(fio.encrypted_page);
 
        /* allocate block address */
-       f2fs_wait_on_page_writeback(dn.node_page, NODE);
+       f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
        allocate_data_block(fio.sbi, NULL, fio.blk_addr,
                                        &fio.blk_addr, &sum, CURSEG_COLD_DATA);
        fio.rw = WRITE_SYNC;
@@ -645,7 +648,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
                        .encrypted_page = NULL,
                };
                set_page_dirty(page);
-               f2fs_wait_on_page_writeback(page, DATA);
+               f2fs_wait_on_page_writeback(page, DATA, true);
                if (clear_page_dirty_for_io(page))
                        inode_dec_dirty_pages(inode);
                set_cold_data(page);
@@ -663,7 +666,7 @@ out:
  * If the parent node is not valid or the data block address is different,
  * the victim data block is ignored.
  */
-static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
 {
        struct super_block *sb = sbi->sb;
@@ -686,7 +689,7 @@ next_step:
 
                /* stop BG_GC if there is not enough free sections. */
                if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
-                       return 0;
+                       return;
 
                if (check_valid_map(sbi, segno, off) == 0)
                        continue;
@@ -719,7 +722,7 @@ next_step:
                                continue;
                        }
 
-                       start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+                       start_bidx = start_bidx_of_node(nofs, inode);
                        data_page = get_read_data_page(inode,
                                        start_bidx + ofs_in_node, READA, true);
                        if (IS_ERR(data_page)) {
@@ -735,7 +738,7 @@ next_step:
                /* phase 3 */
                inode = find_gc_inode(gc_list, dni.ino);
                if (inode) {
-                       start_bidx = start_bidx_of_node(nofs, F2FS_I(inode))
+                       start_bidx = start_bidx_of_node(nofs, inode)
                                                                + ofs_in_node;
                        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                                move_encrypted_block(inode, start_bidx);
@@ -747,15 +750,6 @@ next_step:
 
        if (++phase < 4)
                goto next_step;
-
-       if (gc_type == FG_GC) {
-               f2fs_submit_merged_bio(sbi, DATA, WRITE);
-
-               /* return 1 only if FG_GC succefully reclaimed one */
-               if (get_valid_blocks(sbi, segno, 1) == 0)
-                       return 1;
-       }
-       return 0;
 }
 
 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -771,53 +765,90 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
        return ret;
 }
 
-static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
+static int do_garbage_collect(struct f2fs_sb_info *sbi,
+                               unsigned int start_segno,
                                struct gc_inode_list *gc_list, int gc_type)
 {
        struct page *sum_page;
        struct f2fs_summary_block *sum;
        struct blk_plug plug;
-       int nfree = 0;
+       unsigned int segno = start_segno;
+       unsigned int end_segno = start_segno + sbi->segs_per_sec;
+       int seg_freed = 0;
+       unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
+                                               SUM_TYPE_DATA : SUM_TYPE_NODE;
 
-       /* read segment summary of victim */
-       sum_page = get_sum_page(sbi, segno);
+       /* readahead multi ssa blocks those have contiguous address */
+       if (sbi->segs_per_sec > 1)
+               ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
+                                       sbi->segs_per_sec, META_SSA, true);
+
+       /* reference all summary page */
+       while (segno < end_segno) {
+               sum_page = get_sum_page(sbi, segno++);
+               unlock_page(sum_page);
+       }
 
        blk_start_plug(&plug);
 
-       sum = page_address(sum_page);
+       for (segno = start_segno; segno < end_segno; segno++) {
+               /* find segment summary of victim */
+               sum_page = find_get_page(META_MAPPING(sbi),
+                                       GET_SUM_BLOCK(sbi, segno));
+               f2fs_bug_on(sbi, !PageUptodate(sum_page));
+               f2fs_put_page(sum_page, 0);
 
-       /*
-        * this is to avoid deadlock:
-        * - lock_page(sum_page)         - f2fs_replace_block
-        *  - check_valid_map()            - mutex_lock(sentry_lock)
-        *   - mutex_lock(sentry_lock)     - change_curseg()
-        *                                  - lock_page(sum_page)
-        */
-       unlock_page(sum_page);
-
-       switch (GET_SUM_TYPE((&sum->footer))) {
-       case SUM_TYPE_NODE:
-               nfree = gc_node_segment(sbi, sum->entries, segno, gc_type);
-               break;
-       case SUM_TYPE_DATA:
-               nfree = gc_data_segment(sbi, sum->entries, gc_list,
-                                                       segno, gc_type);
-               break;
+               sum = page_address(sum_page);
+               f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
+
+               /*
+                * this is to avoid deadlock:
+                * - lock_page(sum_page)         - f2fs_replace_block
+                *  - check_valid_map()            - mutex_lock(sentry_lock)
+                *   - mutex_lock(sentry_lock)     - change_curseg()
+                *                                  - lock_page(sum_page)
+                */
+
+               if (type == SUM_TYPE_NODE)
+                       gc_node_segment(sbi, sum->entries, segno, gc_type);
+               else
+                       gc_data_segment(sbi, sum->entries, gc_list, segno,
+                                                               gc_type);
+
+               stat_inc_seg_count(sbi, type, gc_type);
+               stat_inc_call_count(sbi->stat_info);
+
+               f2fs_put_page(sum_page, 0);
        }
-       blk_finish_plug(&plug);
 
-       stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
-       stat_inc_call_count(sbi->stat_info);
+       if (gc_type == FG_GC) {
+               if (type == SUM_TYPE_NODE) {
+                       struct writeback_control wbc = {
+                               .sync_mode = WB_SYNC_ALL,
+                               .nr_to_write = LONG_MAX,
+                               .for_reclaim = 0,
+                       };
+                       sync_node_pages(sbi, 0, &wbc);
+               } else {
+                       f2fs_submit_merged_bio(sbi, DATA, WRITE);
+               }
+       }
 
-       f2fs_put_page(sum_page, 0);
-       return nfree;
+       blk_finish_plug(&plug);
+
+       if (gc_type == FG_GC) {
+               while (start_segno < end_segno)
+                       if (get_valid_blocks(sbi, start_segno++, 1) == 0)
+                               seg_freed++;
+       }
+       return seg_freed;
 }
 
 int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
 {
-       unsigned int segno, i;
+       unsigned int segno;
        int gc_type = sync ? FG_GC : BG_GC;
-       int sec_freed = 0;
+       int sec_freed = 0, seg_freed;
        int ret = -EINVAL;
        struct cp_control cpc;
        struct gc_inode_list gc_list = {
@@ -838,30 +869,24 @@ gc_more:
 
        if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
                gc_type = FG_GC;
+               /*
+                * If there is no victim and no prefree segment but still not
+                * enough free sections, we should flush dent/node blocks and do
+                * garbage collections.
+                */
                if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
                        write_checkpoint(sbi, &cpc);
+               else if (has_not_enough_free_secs(sbi, 0))
+                       write_checkpoint(sbi, &cpc);
        }
 
        if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
                goto stop;
        ret = 0;
 
-       /* readahead multi ssa blocks those have contiguous address */
-       if (sbi->segs_per_sec > 1)
-               ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
-                                                       META_SSA, true);
-
-       for (i = 0; i < sbi->segs_per_sec; i++) {
-               /*
-                * for FG_GC case, halt gcing left segments once failed one
-                * of segments in selected section to avoid long latency.
-                */
-               if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) &&
-                               gc_type == FG_GC)
-                       break;
-       }
+       seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
 
-       if (i == sbi->segs_per_sec && gc_type == FG_GC)
+       if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
                sec_freed++;
 
        if (gc_type == FG_GC)
index c3f0b7d4cfca174bba6e6b4baa796640b767d2ec..0be4a9b400c63db0fecd285b3ad21fe665cb5f7f 100644 (file)
@@ -71,7 +71,7 @@ bool truncate_inline_inode(struct page *ipage, u64 from)
 
        addr = inline_data_addr(ipage);
 
-       f2fs_wait_on_page_writeback(ipage, NODE);
+       f2fs_wait_on_page_writeback(ipage, NODE, true);
        memset(addr + from, 0, MAX_INLINE_DATA - from);
 
        return true;
@@ -124,8 +124,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
        if (err)
                return err;
 
-       f2fs_wait_on_page_writeback(page, DATA);
-
+       f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
        if (PageUptodate(page))
                goto no_update;
 
@@ -150,7 +149,7 @@ no_update:
        write_data_page(dn, &fio);
        set_data_blkaddr(dn);
        f2fs_update_extent_cache(dn);
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
        if (dirty)
                inode_dec_dirty_pages(dn->inode);
 
@@ -159,6 +158,7 @@ no_update:
 
        /* clear inline data and flag after data writeback */
        truncate_inline_inode(dn->inode_page, 0);
+       clear_inline_node(dn->inode_page);
 clear_out:
        stat_dec_inline_inode(dn->inode);
        f2fs_clear_inline_inode(dn->inode);
@@ -223,7 +223,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
 
        f2fs_bug_on(F2FS_I_SB(inode), page->index);
 
-       f2fs_wait_on_page_writeback(dn.inode_page, NODE);
+       f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
        src_addr = kmap_atomic(page);
        dst_addr = inline_data_addr(dn.inode_page);
        memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
@@ -233,6 +233,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
        set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
 
        sync_inode_page(&dn);
+       clear_inline_node(dn.inode_page);
        f2fs_put_dnode(&dn);
        return 0;
 }
@@ -261,7 +262,7 @@ process_inline:
                ipage = get_node_page(sbi, inode->i_ino);
                f2fs_bug_on(sbi, IS_ERR(ipage));
 
-               f2fs_wait_on_page_writeback(ipage, NODE);
+               f2fs_wait_on_page_writeback(ipage, NODE, true);
 
                src_addr = inline_data_addr(npage);
                dst_addr = inline_data_addr(ipage);
@@ -389,7 +390,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
        if (err)
                goto out;
 
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
        zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
 
        dentry_blk = kmap_atomic(page);
@@ -469,7 +470,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
                }
        }
 
-       f2fs_wait_on_page_writeback(ipage, NODE);
+       f2fs_wait_on_page_writeback(ipage, NODE, true);
 
        name_hash = f2fs_dentry_hash(name);
        make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
@@ -507,7 +508,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
        int i;
 
        lock_page(page);
-       f2fs_wait_on_page_writeback(page, NODE);
+       f2fs_wait_on_page_writeback(page, NODE, true);
 
        inline_dentry = inline_data_addr(page);
        bit_pos = dentry - inline_dentry->dentry;
index 2adeff26be11b9689dde24a5d22b0bd3351c21b7..60e3b3078b81469b300fca0d4bf44b2117f709bf 100644 (file)
@@ -83,7 +83,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
 
        while (start < end) {
                if (*start++) {
-                       f2fs_wait_on_page_writeback(ipage, NODE);
+                       f2fs_wait_on_page_writeback(ipage, NODE, true);
 
                        set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
                        set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage));
@@ -227,7 +227,7 @@ int update_inode(struct inode *inode, struct page *node_page)
 {
        struct f2fs_inode *ri;
 
-       f2fs_wait_on_page_writeback(node_page, NODE);
+       f2fs_wait_on_page_writeback(node_page, NODE, true);
 
        ri = F2FS_INODE(node_page);
 
@@ -263,6 +263,10 @@ int update_inode(struct inode *inode, struct page *node_page)
        set_cold_node(inode, node_page);
        clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
 
+       /* deleted inode */
+       if (inode->i_nlink == 0)
+               clear_inline_node(node_page);
+
        return set_page_dirty(node_page);
 }
 
index 342597a5897f059a2d31823923d8664861b7b969..150907ffa7aaf871772488c880d98ce3d86620c5 100644 (file)
@@ -403,14 +403,45 @@ cache:
        up_write(&nm_i->nat_tree_lock);
 }
 
+pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
+{
+       const long direct_index = ADDRS_PER_INODE(dn->inode);
+       const long direct_blks = ADDRS_PER_BLOCK;
+       const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+       unsigned int skipped_unit = ADDRS_PER_BLOCK;
+       int cur_level = dn->cur_level;
+       int max_level = dn->max_level;
+       pgoff_t base = 0;
+
+       if (!dn->max_level)
+               return pgofs + 1;
+
+       while (max_level-- > cur_level)
+               skipped_unit *= NIDS_PER_BLOCK;
+
+       switch (dn->max_level) {
+       case 3:
+               base += 2 * indirect_blks;
+       case 2:
+               base += 2 * direct_blks;
+       case 1:
+               base += direct_index;
+               break;
+       default:
+               f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
+       }
+
+       return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
+}
+
 /*
  * The maximum depth is four.
  * Offset[0] will have raw inode offset.
  */
-static int get_node_path(struct f2fs_inode_info *fi, long block,
+static int get_node_path(struct inode *inode, long block,
                                int offset[4], unsigned int noffset[4])
 {
-       const long direct_index = ADDRS_PER_INODE(fi);
+       const long direct_index = ADDRS_PER_INODE(inode);
        const long direct_blks = ADDRS_PER_BLOCK;
        const long dptrs_per_blk = NIDS_PER_BLOCK;
        const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
@@ -495,10 +526,10 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
        int offset[4];
        unsigned int noffset[4];
        nid_t nids[4];
-       int level, i;
+       int level, i = 0;
        int err = 0;
 
-       level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
+       level = get_node_path(dn->inode, index, offset, noffset);
 
        nids[0] = dn->inode->i_ino;
        npage[0] = dn->inode_page;
@@ -585,6 +616,10 @@ release_pages:
 release_out:
        dn->inode_page = NULL;
        dn->node_page = NULL;
+       if (err == -ENOENT) {
+               dn->cur_level = i;
+               dn->max_level = level;
+       }
        return err;
 }
 
@@ -792,7 +827,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
 
        trace_f2fs_truncate_inode_blocks_enter(inode, from);
 
-       level = get_node_path(F2FS_I(inode), from, offset, noffset);
+       level = get_node_path(inode, from, offset, noffset);
 restart:
        page = get_node_page(sbi, inode->i_ino);
        if (IS_ERR(page)) {
@@ -861,7 +896,7 @@ skip_partial:
                                f2fs_put_page(page, 1);
                                goto restart;
                        }
-                       f2fs_wait_on_page_writeback(page, NODE);
+                       f2fs_wait_on_page_writeback(page, NODE, true);
                        ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
                        set_page_dirty(page);
                        unlock_page(page);
@@ -976,7 +1011,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
        new_ni.ino = dn->inode->i_ino;
        set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 
-       f2fs_wait_on_page_writeback(page, NODE);
+       f2fs_wait_on_page_writeback(page, NODE, true);
        fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
        set_cold_node(dn->inode, page);
        SetPageUptodate(page);
@@ -1154,6 +1189,39 @@ void sync_inode_page(struct dnode_of_data *dn)
        dn->node_changed = ret ? true: false;
 }
 
+static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
+{
+       struct inode *inode;
+       struct page *page;
+
+       /* should flush inline_data before evict_inode */
+       inode = ilookup(sbi->sb, ino);
+       if (!inode)
+               return;
+
+       page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0);
+       if (!page)
+               goto iput_out;
+
+       if (!PageUptodate(page))
+               goto page_out;
+
+       if (!PageDirty(page))
+               goto page_out;
+
+       if (!clear_page_dirty_for_io(page))
+               goto page_out;
+
+       if (!f2fs_write_inline_data(inode, page))
+               inode_dec_dirty_pages(inode);
+       else
+               set_page_dirty(page);
+page_out:
+       f2fs_put_page(page, 1);
+iput_out:
+       iput(inode);
+}
+
 int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
                                        struct writeback_control *wbc)
 {
@@ -1221,6 +1289,17 @@ continue_unlock:
                                goto continue_unlock;
                        }
 
+                       /* flush inline_data */
+                       if (!ino && is_inline_node(page)) {
+                               clear_inline_node(page);
+                               unlock_page(page);
+                               flush_inline_data(sbi, ino_of_node(page));
+                               continue;
+                       }
+
+                       f2fs_wait_on_page_writeback(page, NODE, true);
+
+                       BUG_ON(PageWriteback(page));
                        if (!clear_page_dirty_for_io(page))
                                goto continue_unlock;
 
@@ -1258,8 +1337,13 @@ continue_unlock:
                goto next_step;
        }
 
-       if (wrote)
-               f2fs_submit_merged_bio(sbi, NODE, WRITE);
+       if (wrote) {
+               if (ino)
+                       f2fs_submit_merged_bio_cond(sbi, NULL, NULL,
+                                                       ino, NODE, WRITE);
+               else
+                       f2fs_submit_merged_bio(sbi, NODE, WRITE);
+       }
        return nwritten;
 }
 
@@ -1287,7 +1371,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
                                continue;
 
                        if (ino && ino_of_node(page) == ino) {
-                               f2fs_wait_on_page_writeback(page, NODE);
+                               f2fs_wait_on_page_writeback(page, NODE, true);
                                if (TestClearPageError(page))
                                        ret = -EIO;
                        }
@@ -1326,8 +1410,6 @@ static int f2fs_write_node_page(struct page *page,
        if (unlikely(f2fs_cp_error(sbi)))
                goto redirty_out;
 
-       f2fs_wait_on_page_writeback(page, NODE);
-
        /* get old block addr of this node page */
        nid = nid_of_node(page);
        f2fs_bug_on(sbi, page->index != nid);
@@ -1356,9 +1438,13 @@ static int f2fs_write_node_page(struct page *page,
        set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
        dec_page_count(sbi, F2FS_DIRTY_NODES);
        up_read(&sbi->node_write);
+
+       if (wbc->for_reclaim)
+               f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
+
        unlock_page(page);
 
-       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
+       if (unlikely(f2fs_cp_error(sbi)))
                f2fs_submit_merged_bio(sbi, NODE, WRITE);
 
        return 0;
@@ -1374,8 +1460,6 @@ static int f2fs_write_node_pages(struct address_space *mapping,
        struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
        long diff;
 
-       trace_f2fs_writepages(mapping->host, wbc, NODE);
-
        /* balancing f2fs's metadata in background */
        f2fs_balance_fs_bg(sbi);
 
@@ -1383,6 +1467,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
        if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
                goto skip_write;
 
+       trace_f2fs_writepages(mapping->host, wbc, NODE);
+
        diff = nr_pages_to_write(sbi, NODE, wbc);
        wbc->sync_mode = WB_SYNC_NONE;
        sync_node_pages(sbi, 0, wbc);
@@ -1391,6 +1477,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
 
 skip_write:
        wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
+       trace_f2fs_writepages(mapping->host, wbc, NODE);
        return 0;
 }
 
@@ -1703,7 +1790,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
        src_addr = inline_xattr_addr(page);
        inline_size = inline_xattr_size(inode);
 
-       f2fs_wait_on_page_writeback(ipage, NODE);
+       f2fs_wait_on_page_writeback(ipage, NODE, true);
        memcpy(dst_addr, src_addr, inline_size);
 update_inode:
        update_inode(inode, ipage);
@@ -2000,6 +2087,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        nm_i->nat_cnt = 0;
        nm_i->ram_thresh = DEF_RAM_THRESHOLD;
        nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
+       nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
 
        INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
        INIT_LIST_HEAD(&nm_i->free_nid_list);
index d4d1f636fe1c36c923ebcd68da46249161ccafc8..1f4f9d4569d9cb4f8a6ed93ff112fa300cd49444 100644 (file)
@@ -25,6 +25,9 @@
 /* control the memory footprint threshold (10MB per 1GB ram) */
 #define DEF_RAM_THRESHOLD      10
 
+/* control dirty nats ratio threshold (default: 10% over max nid count) */
+#define DEF_DIRTY_NAT_RATIO_THRESHOLD          10
+
 /* vector size for gang look-up from nat cache that consists of radix tree */
 #define NATVEC_SIZE    64
 #define SETVEC_SIZE    32
@@ -117,6 +120,12 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
        raw_ne->version = ni->version;
 }
 
+static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
+{
+       return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
+                                       NM_I(sbi)->dirty_nats_ratio / 100;
+}
+
 enum mem_type {
        FREE_NIDS,      /* indicates the free nid list */
        NAT_ENTRIES,    /* indicates the cached nat entry */
@@ -321,7 +330,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
 {
        struct f2fs_node *rn = F2FS_NODE(p);
 
-       f2fs_wait_on_page_writeback(p, NODE);
+       f2fs_wait_on_page_writeback(p, NODE, true);
 
        if (i)
                rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
@@ -370,6 +379,21 @@ static inline int is_node(struct page *page, int type)
 #define is_fsync_dnode(page)   is_node(page, FSYNC_BIT_SHIFT)
 #define is_dent_dnode(page)    is_node(page, DENT_BIT_SHIFT)
 
+static inline int is_inline_node(struct page *page)
+{
+       return PageChecked(page);
+}
+
+static inline void set_inline_node(struct page *page)
+{
+       SetPageChecked(page);
+}
+
+static inline void clear_inline_node(struct page *page)
+{
+       ClearPageChecked(page);
+}
+
 static inline void set_cold_node(struct inode *inode, struct page *page)
 {
        struct f2fs_node *rn = F2FS_NODE(page);
index 589b20b8677b8cc1dcf3d16a6f7e0943ce0283eb..5045dd6a27e96c9a890e3e885ac01947c4914c53 100644 (file)
@@ -350,8 +350,7 @@ got_it:
                inode = dn->inode;
        }
 
-       bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
-                       le16_to_cpu(sum.ofs_in_node);
+       bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
 
        /*
         * if inode page is locked, unlock temporarily, but its reference
@@ -386,10 +385,9 @@ truncate_out:
 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
                                        struct page *page, block_t blkaddr)
 {
-       struct f2fs_inode_info *fi = F2FS_I(inode);
-       unsigned int start, end;
        struct dnode_of_data dn;
        struct node_info ni;
+       unsigned int start, end;
        int err = 0, recovered = 0;
 
        /* step 1: recover xattr */
@@ -409,8 +407,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
                goto out;
 
        /* step 3: recover data indices */
-       start = start_bidx_of_node(ofs_of_node(page), fi);
-       end = start + ADDRS_PER_PAGE(page, fi);
+       start = start_bidx_of_node(ofs_of_node(page), inode);
+       end = start + ADDRS_PER_PAGE(page, inode);
 
        set_new_dnode(&dn, inode, NULL, NULL, 0);
 
@@ -418,7 +416,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
        if (err)
                goto out;
 
-       f2fs_wait_on_page_writeback(dn.node_page, NODE);
+       f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
 
        get_node_info(sbi, dn.nid, &ni);
        f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
index 5904a411c86fe8d4feb70f695809273b3a46bd6a..57a5f7bb275ae21b0935e1d8f89e83624bf06117 100644 (file)
@@ -223,7 +223,8 @@ int commit_inmem_pages(struct inode *inode, bool abort)
                if (!abort) {
                        if (cur->page->mapping == inode->i_mapping) {
                                set_page_dirty(cur->page);
-                               f2fs_wait_on_page_writeback(cur->page, DATA);
+                               f2fs_wait_on_page_writeback(cur->page, DATA,
+                                                                       true);
                                if (clear_page_dirty_for_io(cur->page))
                                        inode_dec_dirty_pages(inode);
                                trace_f2fs_commit_inmem_page(cur->page, INMEM);
@@ -253,7 +254,8 @@ int commit_inmem_pages(struct inode *inode, bool abort)
        if (!abort) {
                f2fs_unlock_op(sbi);
                if (submit_bio)
-                       f2fs_submit_merged_bio(sbi, DATA, WRITE);
+                       f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0,
+                                                               DATA, WRITE);
        }
        return err;
 }
@@ -291,8 +293,9 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 
        /* checkpoint is the only way to shrink partial cached entries */
        if (!available_free_memory(sbi, NAT_ENTRIES) ||
-                       excess_prefree_segs(sbi) ||
                        !available_free_memory(sbi, INO_ENTRIES) ||
+                       excess_prefree_segs(sbi) ||
+                       excess_dirty_nats(sbi) ||
                        (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
                if (test_opt(sbi, DATA_FLUSH))
                        sync_dirty_inodes(sbi, FILE_INODE);
@@ -873,9 +876,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
 
        if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
                segno = find_next_zero_bit(free_i->free_segmap,
-                                       MAIN_SEGS(sbi), *newseg + 1);
-               if (segno - *newseg < sbi->segs_per_sec -
-                                       (*newseg % sbi->segs_per_sec))
+                               (hint + 1) * sbi->segs_per_sec, *newseg + 1);
+               if (segno < (hint + 1) * sbi->segs_per_sec)
                        goto got_it;
        }
 find_other_zone:
@@ -1415,53 +1417,17 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
        f2fs_update_extent_cache(dn);
 }
 
-static inline bool is_merged_page(struct f2fs_sb_info *sbi,
-                                       struct page *page, enum page_type type)
-{
-       enum page_type btype = PAGE_TYPE_OF_BIO(type);
-       struct f2fs_bio_info *io = &sbi->write_io[btype];
-       struct bio_vec *bvec;
-       struct page *target;
-       int i;
-
-       down_read(&io->io_rwsem);
-       if (!io->bio) {
-               up_read(&io->io_rwsem);
-               return false;
-       }
-
-       bio_for_each_segment_all(bvec, io->bio, i) {
-
-               if (bvec->bv_page->mapping) {
-                       target = bvec->bv_page;
-               } else {
-                       struct f2fs_crypto_ctx *ctx;
-
-                       /* encrypted page */
-                       ctx = (struct f2fs_crypto_ctx *)page_private(
-                                                               bvec->bv_page);
-                       target = ctx->w.control_page;
-               }
-
-               if (page == target) {
-                       up_read(&io->io_rwsem);
-                       return true;
-               }
-       }
-
-       up_read(&io->io_rwsem);
-       return false;
-}
-
 void f2fs_wait_on_page_writeback(struct page *page,
-                               enum page_type type)
+                               enum page_type type, bool ordered)
 {
        if (PageWriteback(page)) {
                struct f2fs_sb_info *sbi = F2FS_P_SB(page);
 
-               if (is_merged_page(sbi, page, type))
-                       f2fs_submit_merged_bio(sbi, type, WRITE);
-               wait_on_page_writeback(page);
+               f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE);
+               if (ordered)
+                       wait_on_page_writeback(page);
+               else
+                       wait_for_stable_page(page);
        }
 }
 
@@ -1477,7 +1443,7 @@ void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
 
        cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
        if (cpage) {
-               f2fs_wait_on_page_writeback(cpage, DATA);
+               f2fs_wait_on_page_writeback(cpage, DATA, true);
                f2fs_put_page(cpage, 1);
        }
 }
index ee44d346ea44143e5610c59c3d0ed66c88526c43..cd7111b9a664f542865e1272b25554443ac90c5a 100644 (file)
@@ -183,7 +183,7 @@ struct segment_allocation {
  * this value is set in page as a private data which indicate that
  * the page is atomically written, and it is in inmem_pages list.
  */
-#define ATOMIC_WRITTEN_PAGE            0x0000ffff
+#define ATOMIC_WRITTEN_PAGE            ((unsigned long)-1)
 
 #define IS_ATOMIC_WRITTEN_PAGE(page)                   \
                (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
index 6134832baaaf0b25a86ed9cec1e8121b3ba3e4e2..9445a34b8d48c184ecf0762b65dc2096896d2885 100644 (file)
@@ -126,6 +126,19 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
        return NULL;
 }
 
+static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+               struct f2fs_sb_info *sbi, char *buf)
+{
+       struct super_block *sb = sbi->sb;
+
+       if (!sb->s_bdev->bd_part)
+               return snprintf(buf, PAGE_SIZE, "0\n");
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+               (unsigned long long)(sbi->kbytes_written +
+                       BD_PART_WRITTEN(sbi)));
+}
+
 static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
                        struct f2fs_sb_info *sbi, char *buf)
 {
@@ -204,6 +217,9 @@ static struct f2fs_attr f2fs_attr_##_name = {                       \
                f2fs_sbi_show, f2fs_sbi_store,                  \
                offsetof(struct struct_name, elname))
 
+#define F2FS_GENERAL_RO_ATTR(name) \
+static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
+
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
@@ -216,10 +232,12 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -237,8 +255,10 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(dir_level),
        ATTR_LIST(ram_thresh),
        ATTR_LIST(ra_nid_pages),
+       ATTR_LIST(dirty_nats_ratio),
        ATTR_LIST(cp_interval),
        ATTR_LIST(idle_interval),
+       ATTR_LIST(lifetime_write_kbytes),
        NULL,
 };
 
@@ -562,6 +582,13 @@ static void f2fs_put_super(struct super_block *sb)
        f2fs_leave_shrinker(sbi);
        mutex_unlock(&sbi->umount_mutex);
 
+       /* our cp_error case, we can wait for any writeback page */
+       if (get_pages(sbi, F2FS_WRITEBACK)) {
+               f2fs_submit_merged_bio(sbi, DATA, WRITE);
+               f2fs_submit_merged_bio(sbi, NODE, WRITE);
+               f2fs_submit_merged_bio(sbi, META, WRITE);
+       }
+
        iput(sbi->node_inode);
        iput(sbi->meta_inode);
 
@@ -766,8 +793,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        bool need_stop_gc = false;
        bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
 
-       sync_filesystem(sb);
-
        /*
         * Save the old mount options in case we
         * need to restore them.
@@ -775,6 +800,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        org_mount_opt = sbi->mount_opt;
        active_logs = sbi->active_logs;
 
+       if (*flags & MS_RDONLY) {
+               set_opt(sbi, FASTBOOT);
+               set_sbi_flag(sbi, SBI_IS_DIRTY);
+       }
+
+       sync_filesystem(sb);
+
        sbi->mount_opt.opt = 0;
        default_options(sbi);
 
@@ -1242,6 +1274,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        bool retry = true, need_fsck = false;
        char *options = NULL;
        int recovery, i, valid_super_block;
+       struct curseg_info *seg_i;
 
 try_onemore:
        err = -EINVAL;
@@ -1372,6 +1405,17 @@ try_onemore:
                goto free_nm;
        }
 
+       /* For write statistics */
+       if (sb->s_bdev->bd_part)
+               sbi->sectors_written_start =
+                       (u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+
+       /* Read accumulated write IO statistics if exists */
+       seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
+       if (__exist_node_summaries(sbi))
+               sbi->kbytes_written =
+                       le64_to_cpu(seg_i->sum_blk->info.kbytes_written);
+
        build_gc_manager(sbi);
 
        /* get an inode for node space */
index 10f1e784fa2390148aaa37f527526162be2d724d..06a72dc0191a022049bbf48ad26e127bb6211c02 100644 (file)
@@ -300,7 +300,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
 
                if (ipage) {
                        inline_addr = inline_xattr_addr(ipage);
-                       f2fs_wait_on_page_writeback(ipage, NODE);
+                       f2fs_wait_on_page_writeback(ipage, NODE, true);
                } else {
                        page = get_node_page(sbi, inode->i_ino);
                        if (IS_ERR(page)) {
@@ -308,7 +308,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
                                return PTR_ERR(page);
                        }
                        inline_addr = inline_xattr_addr(page);
-                       f2fs_wait_on_page_writeback(page, NODE);
+                       f2fs_wait_on_page_writeback(page, NODE, true);
                }
                memcpy(inline_addr, txattr_addr, inline_size);
                f2fs_put_page(page, 1);
@@ -329,7 +329,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
                        return PTR_ERR(xpage);
                }
                f2fs_bug_on(sbi, new_nid);
-               f2fs_wait_on_page_writeback(xpage, NODE);
+               f2fs_wait_on_page_writeback(xpage, NODE, true);
        } else {
                struct dnode_of_data dn;
                set_new_dnode(&dn, inode, NULL, NULL, new_nid);
index e59c3be921069635c90da255087b12978bf70c9a..f43e6a01a0236ed30674f778d48c6c78a8c2e938 100644 (file)
@@ -21,7 +21,7 @@
 #define F2FS_BLKSIZE                   4096    /* support only 4KB block */
 #define F2FS_BLKSIZE_BITS              12      /* bits for F2FS_BLKSIZE */
 #define F2FS_MAX_EXTENSION             64      /* # of extension entries */
-#define F2FS_BLK_ALIGN(x)      (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
+#define F2FS_BLK_ALIGN(x)      (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS)
 
 #define NULL_ADDR              ((block_t)0)    /* used as block_t addresses */
 #define NEW_ADDR               ((block_t)-1)   /* used as block_t addresses */
@@ -170,12 +170,12 @@ struct f2fs_extent {
 #define F2FS_INLINE_XATTR_ADDRS        50      /* 200 bytes for inline xattrs */
 #define DEF_ADDRS_PER_INODE    923     /* Address Pointers in an Inode */
 #define DEF_NIDS_PER_INODE     5       /* Node IDs in an Inode */
-#define ADDRS_PER_INODE(fi)    addrs_per_inode(fi)
+#define ADDRS_PER_INODE(inode) addrs_per_inode(inode)
 #define ADDRS_PER_BLOCK                1018    /* Address Pointers in a Direct Block */
 #define NIDS_PER_BLOCK         1018    /* Node IDs in an Indirect Block */
 
-#define ADDRS_PER_PAGE(page, fi)       \
-       (IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
+#define ADDRS_PER_PAGE(page, inode)    \
+       (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK)
 
 #define        NODE_DIR1_BLOCK         (DEF_ADDRS_PER_INODE + 1)
 #define        NODE_DIR2_BLOCK         (DEF_ADDRS_PER_INODE + 2)
@@ -345,7 +345,7 @@ struct f2fs_summary {
 
 struct summary_footer {
        unsigned char entry_type;       /* SUM_TYPE_XXX */
-       __u32 check_sum;                /* summary checksum */
+       __le32 check_sum;               /* summary checksum */
 } __packed;
 
 #define SUM_JOURNAL_SIZE       (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
@@ -358,6 +358,12 @@ struct summary_footer {
                                sizeof(struct sit_journal_entry))
 #define SIT_JOURNAL_RESERVED   ((SUM_JOURNAL_SIZE - 2) %\
                                sizeof(struct sit_journal_entry))
+
+/* Reserved area should make size of f2fs_extra_info equals to
+ * that of nat_journal and sit_journal.
+ */
+#define EXTRA_INFO_RESERVED    (SUM_JOURNAL_SIZE - 2 - 8)
+
 /*
  * frequently updated NAT/SIT entries can be stored in the spare area in
  * summary blocks
@@ -387,6 +393,11 @@ struct sit_journal {
        __u8 reserved[SIT_JOURNAL_RESERVED];
 } __packed;
 
+struct f2fs_extra_info {
+       __le64 kbytes_written;
+       __u8 reserved[EXTRA_INFO_RESERVED];
+} __packed;
+
 /* 4KB-sized summary block structure */
 struct f2fs_summary_block {
        struct f2fs_summary entries[ENTRIES_IN_SUM];
@@ -394,10 +405,11 @@ struct f2fs_summary_block {
                __le16 n_nats;
                __le16 n_sits;
        };
-       /* spare area is used by NAT or SIT journals */
+       /* spare area is used by NAT or SIT journals or extra info */
        union {
                struct nat_journal nat_j;
                struct sit_journal sit_j;
+               struct f2fs_extra_info info;
        };
        struct summary_footer footer;
 } __packed;