Merge remote-tracking branch 'f2fs/dev'

author Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 11 Feb 2016 00:05:51 +0000 (11:05 +1100)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 11 Feb 2016 00:05:51 +0000 (11:05 +1100)
author Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Feb 2016 00:05:51 +0000 (11:05 +1100)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Feb 2016 00:05:51 +0000 (11:05 +1100)
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs

index e5200f354abfe933d3fbe69f919da2dcc0e585a6..a809f6005f1464ed7c86133db7b4b95d4a3c7388 100644 (file)
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -98,3 +98,17 @@ Date:                October 2015
  Contact:       "Chao Yu" <chao2.yu@samsung.com>
  Description:
                  Controls the count of nid pages to be readaheaded.
+
+What:          /sys/fs/f2fs/<disk>/dirty_nats_ratio
+Date:          January 2016
+Contact:       "Chao Yu" <chao2.yu@samsung.com>
+Description:
+                Controls dirty nat entries ratio threshold, if current
+                ratio exceeds configured threshold, checkpoint will
+                be triggered for flushing dirty nat entries.
+
+What:          /sys/fs/f2fs/<disk>/lifetime_write_kbytes
+Date:          January 2016
+Contact:       "Shuoran Liu" <liushuoran@huawei.com>
+Description:
+                Shows total written kbytes issued to disk.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c

index 3842af954cd5bc127f2f0aad13d0ff52b743779d..536bec99bd64d39570edd0f08ac5545e79ade5a2 100644 (file)
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -39,7 +39,7 @@ repeat:
                 cond_resched();
                 goto repeat;
         }
-       f2fs_wait_on_page_writeback(page, META);
+       f2fs_wait_on_page_writeback(page, META, true);
         SetPageUptodate(page);
         return page;
  }
@@ -232,13 +232,17 @@ static int f2fs_write_meta_page(struct page *page,
         if (unlikely(f2fs_cp_error(sbi)))
                 goto redirty_out;
  
-       f2fs_wait_on_page_writeback(page, META);
         write_meta_page(sbi, page);
         dec_page_count(sbi, F2FS_DIRTY_META);
+
+       if (wbc->for_reclaim)
+               f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
+
         unlock_page(page);
  
-       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
+       if (unlikely(f2fs_cp_error(sbi)))
                 f2fs_submit_merged_bio(sbi, META, WRITE);
+
         return 0;
  
  redirty_out:
@@ -252,13 +256,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
         long diff, written;
  
-       trace_f2fs_writepages(mapping->host, wbc, META);
-
         /* collect a number of dirty meta pages and write together */
         if (wbc->for_kupdate ||
                 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
                 goto skip_write;
  
+       trace_f2fs_writepages(mapping->host, wbc, META);
+
         /* if mounting is failed, skip writing node pages */
         mutex_lock(&sbi->cp_mutex);
         diff = nr_pages_to_write(sbi, META, wbc);
@@ -269,6 +273,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
  
  skip_write:
         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
+       trace_f2fs_writepages(mapping->host, wbc, META);
         return 0;
  }
  
@@ -315,6 +320,9 @@ continue_unlock:
                                 goto continue_unlock;
                         }
  
+                       f2fs_wait_on_page_writeback(page, META, true);
+
+                       BUG_ON(PageWriteback(page));
                         if (!clear_page_dirty_for_io(page))
                                 goto continue_unlock;
  
@@ -921,6 +929,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         int cp_payload_blks = __cp_payload(sbi);
         block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
         bool invalidate = false;
+       struct super_block *sb = sbi->sb;
+       struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
+       u64 kbytes_written;
  
         /*
          * This avoids to conduct wrong roll-forward operations and uses
@@ -1034,6 +1045,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  
         write_data_summaries(sbi, start_blk);
         start_blk += data_sum_blocks;
+
+       /* Record write statistics in the hot node summary */
+       kbytes_written = sbi->kbytes_written;
+       if (sb->s_bdev->bd_part)
+               kbytes_written += BD_PART_WRITTEN(sbi);
+
+       seg_i->sum_blk->info.kbytes_written = cpu_to_le64(kbytes_written);
+
         if (__remain_node_summaries(cpc->reason)) {
                 write_node_summaries(sbi, start_blk);
                 start_blk += NR_CURSEG_NODE_TYPE;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c

index 5c06db17e41fa267f5b270061d2959b2a36803e4..03f948e84115df85ea68faadb856c7c3eef3383c 100644 (file)
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -67,7 +67,6 @@ static void f2fs_write_end_io(struct bio *bio)
                 f2fs_restore_and_release_control_page(&page);
  
                 if (unlikely(bio->bi_error)) {
-                       set_page_dirty(page);
                         set_bit(AS_EIO, &page->mapping->flags);
                         f2fs_stop_checkpoint(sbi);
                 }
@@ -75,8 +74,7 @@ static void f2fs_write_end_io(struct bio *bio)
                 dec_page_count(sbi, F2FS_WRITEBACK);
         }
  
-       if (!get_pages(sbi, F2FS_WRITEBACK) &&
-                       !list_empty(&sbi->cp_wait.task_list))
+       if (!get_pages(sbi, F2FS_WRITEBACK) && wq_has_sleeper(&sbi->cp_wait))
                 wake_up(&sbi->cp_wait);
  
         bio_put(bio);
@@ -116,8 +114,60 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
         io->bio = NULL;
  }
  
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
-                               enum page_type type, int rw)
+static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
+                                               struct page *page, nid_t ino)
+{
+       struct bio_vec *bvec;
+       struct page *target;
+       int i;
+
+       if (!io->bio)
+               return false;
+
+       if (!inode && !page && !ino)
+               return true;
+
+       bio_for_each_segment_all(bvec, io->bio, i) {
+
+               if (bvec->bv_page->mapping) {
+                       target = bvec->bv_page;
+               } else {
+                       struct f2fs_crypto_ctx *ctx;
+
+                       /* encrypted page */
+                       ctx = (struct f2fs_crypto_ctx *)page_private(
+                                                               bvec->bv_page);
+                       target = ctx->w.control_page;
+               }
+
+               if (inode && inode == target->mapping->host)
+                       return true;
+               if (page && page == target)
+                       return true;
+               if (ino && ino == ino_of_node(target))
+                       return true;
+       }
+
+       return false;
+}
+
+static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
+                                               struct page *page, nid_t ino,
+                                               enum page_type type)
+{
+       enum page_type btype = PAGE_TYPE_OF_BIO(type);
+       struct f2fs_bio_info *io = &sbi->write_io[btype];
+       bool ret;
+
+       down_read(&io->io_rwsem);
+       ret = __has_merged_page(io, inode, page, ino);
+       up_read(&io->io_rwsem);
+       return ret;
+}
+
+static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
+                               struct inode *inode, struct page *page,
+                               nid_t ino, enum page_type type, int rw)
  {
         enum page_type btype = PAGE_TYPE_OF_BIO(type);
         struct f2fs_bio_info *io;
@@ -126,6 +176,9 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
  
         down_write(&io->io_rwsem);
  
+       if (!__has_merged_page(io, inode, page, ino))
+               goto out;
+
         /* change META to META_FLUSH in the checkpoint procedure */
         if (type >= META_FLUSH) {
                 io->fio.type = META_FLUSH;
@@ -135,9 +188,24 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
                         io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
         }
         __submit_merged_bio(io);
+out:
         up_write(&io->io_rwsem);
  }
  
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
+                                                                       int rw)
+{
+       __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
+}
+
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+                               struct inode *inode, struct page *page,
+                               nid_t ino, enum page_type type, int rw)
+{
+       if (has_merged_page(sbi, inode, page, ino, type))
+               __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
+}
+
  /*
   * Fill the locked page with data located in the block address.
   * Return unlocked page.
@@ -218,7 +286,7 @@ void set_data_blkaddr(struct dnode_of_data *dn)
         struct page *node_page = dn->node_page;
         unsigned int ofs_in_node = dn->ofs_in_node;
  
-       f2fs_wait_on_page_writeback(node_page, NODE);
+       f2fs_wait_on_page_writeback(node_page, NODE, true);
  
         rn = F2FS_NODE(node_page);
  
@@ -461,7 +529,6 @@ got_it:
  static int __allocate_data_block(struct dnode_of_data *dn)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
-       struct f2fs_inode_info *fi = F2FS_I(dn->inode);
         struct f2fs_summary sum;
         struct node_info ni;
         int seg = CURSEG_WARM_DATA;
@@ -489,7 +556,7 @@ alloc:
         set_data_blkaddr(dn);
  
         /* update i_size */
-       fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+       fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
                                                         dn->ofs_in_node;
         if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
                 i_size_write(dn->inode,
@@ -497,67 +564,33 @@ alloc:
         return 0;
  }
  
-static int __allocate_data_blocks(struct inode *inode, loff_t offset,
-                                                       size_t count)
+ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
  {
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct dnode_of_data dn;
-       u64 start = F2FS_BYTES_TO_BLK(offset);
-       u64 len = F2FS_BYTES_TO_BLK(count);
-       bool allocated;
-       u64 end_offset;
-       int err = 0;
-
-       while (len) {
-               f2fs_lock_op(sbi);
-
-               /* When reading holes, we need its node page */
-               set_new_dnode(&dn, inode, NULL, NULL, 0);
-               err = get_dnode_of_data(&dn, start, ALLOC_NODE);
-               if (err)
-                       goto out;
-
-               allocated = false;
-               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
-
-               while (dn.ofs_in_node < end_offset && len) {
-                       block_t blkaddr;
-
-                       if (unlikely(f2fs_cp_error(sbi))) {
-                               err = -EIO;
-                               goto sync_out;
-                       }
-
-                       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
-                       if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
-                               err = __allocate_data_block(&dn);
-                               if (err)
-                                       goto sync_out;
-                               allocated = true;
-                       }
-                       len--;
-                       start++;
-                       dn.ofs_in_node++;
-               }
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct f2fs_map_blocks map;
+       ssize_t ret = 0;
  
-               if (allocated)
-                       sync_inode_page(&dn);
+       map.m_lblk = F2FS_BYTES_TO_BLK(iocb->ki_pos);
+       map.m_len = F2FS_BLK_ALIGN(iov_iter_count(from));
+       map.m_next_pgofs = NULL;
  
-               f2fs_put_dnode(&dn);
-               f2fs_unlock_op(sbi);
+       if (f2fs_encrypted_inode(inode))
+               return 0;
  
-               f2fs_balance_fs(sbi, dn.node_changed);
+       if (iocb->ki_flags & IOCB_DIRECT) {
+               ret = f2fs_convert_inline_inode(inode);
+               if (ret)
+                       return ret;
+               return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
         }
-       return err;
-
-sync_out:
-       if (allocated)
-               sync_inode_page(&dn);
-       f2fs_put_dnode(&dn);
-out:
-       f2fs_unlock_op(sbi);
-       f2fs_balance_fs(sbi, dn.node_changed);
-       return err;
+       if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
+               ret = f2fs_convert_inline_inode(inode);
+               if (ret)
+                       return ret;
+       }
+       if (!f2fs_has_inline_data(inode))
+               return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+       return ret;
  }
  
  /*
@@ -588,13 +621,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
         /* it only supports block size == page size */
         pgofs = (pgoff_t)map->m_lblk;
  
-       if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+       if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
                 map->m_pblk = ei.blk + pgofs - ei.fofs;
                 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
                 map->m_flags = F2FS_MAP_MAPPED;
                 goto out;
         }
  
+next_dnode:
         if (create)
                 f2fs_lock_op(sbi);
  
@@ -602,120 +636,98 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
         set_new_dnode(&dn, inode, NULL, NULL, 0);
         err = get_dnode_of_data(&dn, pgofs, mode);
         if (err) {
-               if (err == -ENOENT)
+               if (err == -ENOENT) {
                         err = 0;
+                       if (map->m_next_pgofs)
+                               *map->m_next_pgofs =
+                                       get_next_page_offset(&dn, pgofs);
+               }
                 goto unlock_out;
         }
  
-       if (dn.data_blkaddr == NEW_ADDR || dn.data_blkaddr == NULL_ADDR) {
+       end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+
+next_block:
+       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+       if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
                 if (create) {
                         if (unlikely(f2fs_cp_error(sbi))) {
                                 err = -EIO;
-                               goto put_out;
+                               goto sync_out;
+                       }
+                       if (flag == F2FS_GET_BLOCK_PRE_AIO) {
+                               if (blkaddr == NULL_ADDR)
+                                       err = reserve_new_block(&dn);
+                       } else {
+                               err = __allocate_data_block(&dn);
                         }
-                       err = __allocate_data_block(&dn);
                         if (err)
-                               goto put_out;
+                               goto sync_out;
                         allocated = true;
                         map->m_flags = F2FS_MAP_NEW;
+                       blkaddr = dn.data_blkaddr;
                 } else {
+                       if (flag == F2FS_GET_BLOCK_FIEMAP &&
+                                               blkaddr == NULL_ADDR) {
+                               if (map->m_next_pgofs)
+                                       *map->m_next_pgofs = pgofs + 1;
+                       }
                         if (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                               dn.data_blkaddr != NEW_ADDR) {
+                                               blkaddr != NEW_ADDR) {
                                 if (flag == F2FS_GET_BLOCK_BMAP)
                                         err = -ENOENT;
-                               goto put_out;
+                               goto sync_out;
                         }
-
-                       /*
-                        * preallocated unwritten block should be mapped
-                        * for fiemap.
-                        */
-                       if (dn.data_blkaddr == NEW_ADDR)
-                               map->m_flags = F2FS_MAP_UNWRITTEN;
                 }
         }
  
-       map->m_flags |= F2FS_MAP_MAPPED;
-       map->m_pblk = dn.data_blkaddr;
-       map->m_len = 1;
+       if (map->m_len == 0) {
+               /* preallocated unwritten block should be mapped for fiemap. */
+               if (blkaddr == NEW_ADDR)
+                       map->m_flags |= F2FS_MAP_UNWRITTEN;
+               map->m_flags |= F2FS_MAP_MAPPED;
+
+               map->m_pblk = blkaddr;
+               map->m_len = 1;
+       } else if ((map->m_pblk != NEW_ADDR &&
+                       blkaddr == (map->m_pblk + ofs)) ||
+                       (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
+                       flag == F2FS_GET_BLOCK_PRE_DIO ||
+                       flag == F2FS_GET_BLOCK_PRE_AIO) {
+               ofs++;
+               map->m_len++;
+       } else {
+               goto sync_out;
+       }
  
-       end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
         dn.ofs_in_node++;
         pgofs++;
  
-get_next:
-       if (map->m_len >= maxblocks)
-               goto sync_out;
+       if (map->m_len < maxblocks) {
+               if (dn.ofs_in_node < end_offset)
+                       goto next_block;
  
-       if (dn.ofs_in_node >= end_offset) {
                 if (allocated)
                         sync_inode_page(&dn);
-               allocated = false;
                 f2fs_put_dnode(&dn);
  
                 if (create) {
                         f2fs_unlock_op(sbi);
-                       f2fs_balance_fs(sbi, dn.node_changed);
-                       f2fs_lock_op(sbi);
-               }
-
-               set_new_dnode(&dn, inode, NULL, NULL, 0);
-               err = get_dnode_of_data(&dn, pgofs, mode);
-               if (err) {
-                       if (err == -ENOENT)
-                               err = 0;
-                       goto unlock_out;
+                       f2fs_balance_fs(sbi, allocated);
                 }
-
-               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
-       }
-
-       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
-
-       if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
-               if (create) {
-                       if (unlikely(f2fs_cp_error(sbi))) {
-                               err = -EIO;
-                               goto sync_out;
-                       }
-                       err = __allocate_data_block(&dn);
-                       if (err)
-                               goto sync_out;
-                       allocated = true;
-                       map->m_flags |= F2FS_MAP_NEW;
-                       blkaddr = dn.data_blkaddr;
-               } else {
-                       /*
-                        * we only merge preallocated unwritten blocks
-                        * for fiemap.
-                        */
-                       if (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                       blkaddr != NEW_ADDR)
-                               goto sync_out;
-               }
-       }
-
-       /* Give more consecutive addresses for the readahead */
-       if ((map->m_pblk != NEW_ADDR &&
-                       blkaddr == (map->m_pblk + ofs)) ||
-                       (map->m_pblk == NEW_ADDR &&
-                       blkaddr == NEW_ADDR)) {
-               ofs++;
-               dn.ofs_in_node++;
-               pgofs++;
-               map->m_len++;
-               goto get_next;
+               allocated = false;
+               goto next_dnode;
         }
  
  sync_out:
         if (allocated)
                 sync_inode_page(&dn);
-put_out:
         f2fs_put_dnode(&dn);
  unlock_out:
         if (create) {
                 f2fs_unlock_op(sbi);
-               f2fs_balance_fs(sbi, dn.node_changed);
+               f2fs_balance_fs(sbi, allocated);
         }
  out:
         trace_f2fs_map_blocks(inode, map, err);
@@ -723,13 +735,15 @@ out:
  }
  
  static int __get_data_block(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh, int create, int flag)
+                       struct buffer_head *bh, int create, int flag,
+                       pgoff_t *next_pgofs)
  {
         struct f2fs_map_blocks map;
         int ret;
  
         map.m_lblk = iblock;
         map.m_len = bh->b_size >> inode->i_blkbits;
+       map.m_next_pgofs = next_pgofs;
  
         ret = f2fs_map_blocks(inode, &map, create, flag);
         if (!ret) {
@@ -741,16 +755,18 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
  }
  
  static int get_data_block(struct inode *inode, sector_t iblock,
-                       struct buffer_head *bh_result, int create, int flag)
+                       struct buffer_head *bh_result, int create, int flag,
+                       pgoff_t *next_pgofs)
  {
-       return __get_data_block(inode, iblock, bh_result, create, flag);
+       return __get_data_block(inode, iblock, bh_result, create,
+                                                       flag, next_pgofs);
  }
  
  static int get_data_block_dio(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create)
  {
         return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_DIO);
+                                               F2FS_GET_BLOCK_DIO, NULL);
  }
  
  static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -761,7 +777,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock,
                 return -EFBIG;
  
         return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_BMAP);
+                                               F2FS_GET_BLOCK_BMAP, NULL);
  }
  
  static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -779,6 +795,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  {
         struct buffer_head map_bh;
         sector_t start_blk, last_blk;
+       pgoff_t next_pgofs;
         loff_t isize;
         u64 logical = 0, phys = 0, size = 0;
         u32 flags = 0;
@@ -814,14 +831,15 @@ next:
         map_bh.b_size = len;
  
         ret = get_data_block(inode, start_blk, &map_bh, 0,
-                                       F2FS_GET_BLOCK_FIEMAP);
+                                       F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
         if (ret)
                 goto out;
  
         /* HOLE */
         if (!buffer_mapped(&map_bh)) {
+               start_blk = next_pgofs;
                 /* Go through holes util pass the EOF */
-               if (blk_to_logical(inode, start_blk++) < isize)
+               if (blk_to_logical(inode, start_blk) < isize)
                         goto prep_next;
                 /* Found a hole beyond isize means no more extents.
                  * Note that the premise is that filesystems don't
@@ -889,6 +907,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
         map.m_lblk = 0;
         map.m_len = 0;
         map.m_flags = 0;
+       map.m_next_pgofs = NULL;
  
         for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
  
@@ -927,7 +946,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
                         map.m_len = last_block - block_in_file;
  
                         if (f2fs_map_blocks(inode, &map, 0,
-                                                       F2FS_GET_BLOCK_READ))
+                                               F2FS_GET_BLOCK_READ))
                                 goto set_error_page;
                 }
  got_it:
@@ -1177,12 +1196,18 @@ out:
         inode_dec_dirty_pages(inode);
         if (err)
                 ClearPageUptodate(page);
+
+       if (wbc->for_reclaim) {
+               f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
+               remove_dirty_inode(inode);
+       }
+
         unlock_page(page);
         f2fs_balance_fs(sbi, need_balance_fs);
-       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
+
+       if (unlikely(f2fs_cp_error(sbi)))
                 f2fs_submit_merged_bio(sbi, DATA, WRITE);
-               remove_dirty_inode(inode);
-       }
+
         return 0;
  
  redirty_out:
@@ -1282,7 +1307,8 @@ continue_unlock:
  
                         if (PageWriteback(page)) {
                                 if (wbc->sync_mode != WB_SYNC_NONE)
-                                       f2fs_wait_on_page_writeback(page, DATA);
+                                       f2fs_wait_on_page_writeback(page,
+                                                               DATA, true);
                                 else
                                         goto continue_unlock;
                         }
@@ -1339,8 +1365,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
         int ret;
         long diff;
  
-       trace_f2fs_writepages(mapping->host, wbc, DATA);
-
         /* deal with chardevs and other special file */
         if (!mapping->a_ops->writepage)
                 return 0;
@@ -1362,14 +1386,16 @@ static int f2fs_write_data_pages(struct address_space *mapping,
         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                 goto skip_write;
  
+       trace_f2fs_writepages(mapping->host, wbc, DATA);
+
         diff = nr_pages_to_write(sbi, DATA, wbc);
  
-       if (!S_ISDIR(inode->i_mode)) {
+       if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) {
                 mutex_lock(&sbi->writepages);
                 locked = true;
         }
         ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
-       f2fs_submit_merged_bio(sbi, DATA, WRITE);
+       f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
         if (locked)
                 mutex_unlock(&sbi->writepages);
  
@@ -1380,6 +1406,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
  
  skip_write:
         wbc->pages_skipped += get_dirty_pages(inode);
+       trace_f2fs_writepages(mapping->host, wbc, DATA);
         return 0;
  }
  
@@ -1406,6 +1433,14 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
         struct extent_info ei;
         int err = 0;
  
+       /*
+        * we already allocated all the blocks, so we don't need to get
+        * the block addresses when there is no need to fill the page.
+        */
+       if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
+                                       len == PAGE_CACHE_SIZE)
+               return 0;
+
         if (f2fs_has_inline_data(inode) ||
                         (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
                 f2fs_lock_op(sbi);
@@ -1425,7 +1460,7 @@ restart:
                 if (pos + len <= MAX_INLINE_DATA) {
                         read_inline_data(page, ipage);
                         set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
-                       sync_inode_page(&dn);
+                       set_inline_node(ipage);
                 } else {
                         err = f2fs_convert_inline_page(&dn, page);
                         if (err)
@@ -1439,13 +1474,9 @@ restart:
                 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
                         dn.data_blkaddr = ei.blk + index - ei.fofs;
                 } else {
-                       bool restart = false;
-
                         /* hole case */
                         err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
-                       if (err || (!err && dn.data_blkaddr == NULL_ADDR))
-                               restart = true;
-                       if (restart) {
+                       if (err || (!err && dn.data_blkaddr == NULL_ADDR)) {
                                 f2fs_put_dnode(&dn);
                                 f2fs_lock_op(sbi);
                                 locked = true;
@@ -1514,7 +1545,7 @@ repeat:
                 }
         }
  
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, false);
  
         /* wait for GCed encrypted page writeback */
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
@@ -1592,7 +1623,6 @@ static int f2fs_write_end(struct file *file,
         if (pos + copied > i_size_read(inode)) {
                 i_size_write(inode, pos + copied);
                 mark_inode_dirty(inode);
-               update_inode_page(inode);
         }
  
         f2fs_put_page(page, 1);
@@ -1617,34 +1647,21 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
  static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                               loff_t offset)
  {
-       struct file *file = iocb->ki_filp;
-       struct address_space *mapping = file->f_mapping;
+       struct address_space *mapping = iocb->ki_filp->f_mapping;
         struct inode *inode = mapping->host;
         size_t count = iov_iter_count(iter);
         int err;
  
-       /* we don't need to use inline_data strictly */
-       err = f2fs_convert_inline_inode(inode);
+       err = check_direct_IO(inode, iter, offset);
         if (err)
                 return err;
  
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                 return 0;
  
-       err = check_direct_IO(inode, iter, offset);
-       if (err)
-               return err;
-
         trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
  
-       if (iov_iter_rw(iter) == WRITE) {
-               err = __allocate_data_blocks(inode, offset, count);
-               if (err)
-                       goto out;
-       }
-
         err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
-out:
         if (err < 0 && iov_iter_rw(iter) == WRITE)
                 f2fs_write_failed(mapping, offset + count);
  
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c

index faa7495e2d7e62effef70b681c52ba2c75ca36d9..8950fc3cc2f7578520bfb340e80c7269b3dd8064 100644 (file)
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -296,7 +296,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
  {
         enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
         lock_page(page);
-       f2fs_wait_on_page_writeback(page, type);
+       f2fs_wait_on_page_writeback(page, type, true);
         de->ino = cpu_to_le32(inode->i_ino);
         set_de_type(de, inode->i_mode);
         f2fs_dentry_kunmap(dir, page);
@@ -311,7 +311,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
  {
         struct f2fs_inode *ri;
  
-       f2fs_wait_on_page_writeback(ipage, NODE);
+       f2fs_wait_on_page_writeback(ipage, NODE, true);
  
         /* copy name info. to this inode page */
         ri = F2FS_INODE(ipage);
@@ -598,7 +598,7 @@ start:
         ++level;
         goto start;
  add_dentry:
-       f2fs_wait_on_page_writeback(dentry_page, DATA);
+       f2fs_wait_on_page_writeback(dentry_page, DATA, true);
  
         if (inode) {
                 down_write(&F2FS_I(inode)->i_sem);
@@ -709,7 +709,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
                 return f2fs_delete_inline_entry(dentry, page, dir, inode);
  
         lock_page(page);
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
  
         dentry_blk = page_address(page);
         bit_pos = dentry - dentry_blk->dentry;
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c

index ccd5c636d3fe026d0c5379d4062ee204fc7faa17..071a1b19e5afb793e8eeea924af9ce32eca30a76 100644 (file)
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -33,6 +33,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
  
         en->ei = *ei;
         INIT_LIST_HEAD(&en->list);
+       en->et = et;
  
         rb_link_node(&en->rb_node, parent, p);
         rb_insert_color(&en->rb_node, &et->root);
@@ -50,6 +51,24 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
  
         if (et->cached_en == en)
                 et->cached_en = NULL;
+       kmem_cache_free(extent_node_slab, en);
+}
+
+/*
+ * Flow to release an extent_node:
+ * 1. list_del_init
+ * 2. __detach_extent_node
+ * 3. kmem_cache_free.
+ */
+static void __release_extent_node(struct f2fs_sb_info *sbi,
+                       struct extent_tree *et, struct extent_node *en)
+{
+       spin_lock(&sbi->extent_lock);
+       f2fs_bug_on(sbi, list_empty(&en->list));
+       list_del_init(&en->list);
+       spin_unlock(&sbi->extent_lock);
+
+       __detach_extent_node(sbi, et, en);
  }
  
  static struct extent_tree *__grab_extent_tree(struct inode *inode)
@@ -129,7 +148,7 @@ static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
  }
  
  static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
-                                       struct extent_tree *et, bool free_all)
+                                       struct extent_tree *et)
  {
         struct rb_node *node, *next;
         struct extent_node *en;
@@ -139,18 +158,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
         while (node) {
                 next = rb_next(node);
                 en = rb_entry(node, struct extent_node, rb_node);
-
-               if (free_all) {
-                       spin_lock(&sbi->extent_lock);
-                       if (!list_empty(&en->list))
-                               list_del_init(&en->list);
-                       spin_unlock(&sbi->extent_lock);
-               }
-
-               if (free_all || list_empty(&en->list)) {
-                       __detach_extent_node(sbi, et, en);
-                       kmem_cache_free(extent_node_slab, en);
-               }
+               __release_extent_node(sbi, et, en);
                 node = next;
         }
  
@@ -232,9 +240,10 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
         if (en) {
                 *ei = en->ei;
                 spin_lock(&sbi->extent_lock);
-               if (!list_empty(&en->list))
+               if (!list_empty(&en->list)) {
                         list_move_tail(&en->list, &sbi->extent_list);
-               et->cached_en = en;
+                       et->cached_en = en;
+               }
                 spin_unlock(&sbi->extent_lock);
                 ret = true;
         }
@@ -329,7 +338,6 @@ lookup_neighbors:
  
  static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
                                 struct extent_tree *et, struct extent_info *ei,
-                               struct extent_node **den,
                                 struct extent_node *prev_ex,
                                 struct extent_node *next_ex)
  {
@@ -342,20 +350,25 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
         }
  
         if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
-               if (en) {
-                       __detach_extent_node(sbi, et, prev_ex);
-                       *den = prev_ex;
-               }
+               if (en)
+                       __release_extent_node(sbi, et, prev_ex);
                 next_ex->ei.fofs = ei->fofs;
                 next_ex->ei.blk = ei->blk;
                 next_ex->ei.len += ei->len;
                 en = next_ex;
         }
  
-       if (en) {
-               __try_update_largest_extent(et, en);
+       if (!en)
+               return NULL;
+
+       __try_update_largest_extent(et, en);
+
+       spin_lock(&sbi->extent_lock);
+       if (!list_empty(&en->list)) {
+               list_move_tail(&en->list, &sbi->extent_list);
                 et->cached_en = en;
         }
+       spin_unlock(&sbi->extent_lock);
         return en;
  }
  
@@ -391,7 +404,12 @@ do_insert:
                 return NULL;
  
         __try_update_largest_extent(et, en);
+
+       /* update in global extent list */
+       spin_lock(&sbi->extent_lock);
+       list_add_tail(&en->list, &sbi->extent_list);
         et->cached_en = en;
+       spin_unlock(&sbi->extent_lock);
         return en;
  }
  
@@ -479,7 +497,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
                 if (parts)
                         __try_update_largest_extent(et, en);
                 else
-                       __detach_extent_node(sbi, et, en);
+                       __release_extent_node(sbi, et, en);
  
                 /*
                  * if original extent is split into zero or two parts, extent
@@ -490,31 +508,15 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
                         insert_p = NULL;
                         insert_parent = NULL;
                 }
-
-               /* update in global extent list */
-               spin_lock(&sbi->extent_lock);
-               if (!parts && !list_empty(&en->list))
-                       list_del(&en->list);
-               if (en1)
-                       list_add_tail(&en1->list, &sbi->extent_list);
-               spin_unlock(&sbi->extent_lock);
-
-               /* release extent node */
-               if (!parts)
-                       kmem_cache_free(extent_node_slab, en);
-
                 en = next_en;
         }
  
         /* 3. update extent in extent cache */
         if (blkaddr) {
-               struct extent_node *den = NULL;
  
                 set_extent_info(&ei, fofs, blkaddr, len);
-               en1 = __try_merge_extent_node(sbi, et, &ei, &den,
-                                                       prev_en, next_en);
-               if (!en1)
-                       en1 = __insert_extent_tree(sbi, et, &ei,
+               if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
+                       __insert_extent_tree(sbi, et, &ei,
                                                 insert_p, insert_parent);
  
                 /* give up extent_cache, if split and small updates happen */
@@ -524,24 +526,10 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
                         et->largest.len = 0;
                         set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
                 }
-
-               spin_lock(&sbi->extent_lock);
-               if (en1) {
-                       if (list_empty(&en1->list))
-                               list_add_tail(&en1->list, &sbi->extent_list);
-                       else
-                               list_move_tail(&en1->list, &sbi->extent_list);
-               }
-               if (den && !list_empty(&den->list))
-                       list_del(&den->list);
-               spin_unlock(&sbi->extent_lock);
-
-               if (den)
-                       kmem_cache_free(extent_node_slab, den);
         }
  
         if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
-               __free_extent_tree(sbi, et, true);
+               __free_extent_tree(sbi, et);
  
         write_unlock(&et->lock);
  
@@ -550,14 +538,10 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
  
  unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
  {
-       struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
         struct extent_tree *et, *next;
-       struct extent_node *en, *tmp;
-       unsigned long ino = F2FS_ROOT_INO(sbi);
-       unsigned int found;
+       struct extent_node *en;
         unsigned int node_cnt = 0, tree_cnt = 0;
         int remained;
-       bool do_free = false;
  
         if (!test_opt(sbi, EXTENT_CACHE))
                 return 0;
@@ -572,10 +556,10 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
         list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
                 if (atomic_read(&et->node_cnt)) {
                         write_lock(&et->lock);
-                       node_cnt += __free_extent_tree(sbi, et, true);
+                       node_cnt += __free_extent_tree(sbi, et);
                         write_unlock(&et->lock);
                 }
-
+               f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
                 list_del_init(&et->list);
                 radix_tree_delete(&sbi->extent_tree_root, et->ino);
                 kmem_cache_free(extent_tree_slab, et);
@@ -585,6 +569,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
  
                 if (node_cnt + tree_cnt >= nr_shrink)
                         goto unlock_out;
+               cond_resched();
         }
         up_write(&sbi->extent_tree_lock);
  
@@ -596,42 +581,29 @@ free_node:
         remained = nr_shrink - (node_cnt + tree_cnt);
  
         spin_lock(&sbi->extent_lock);
-       list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
-               if (!remained--)
+       for (; remained > 0; remained--) {
+               if (list_empty(&sbi->extent_list))
                         break;
-               list_del_init(&en->list);
-               do_free = true;
-       }
-       spin_unlock(&sbi->extent_lock);
-
-       if (do_free == false)
-               goto unlock_out;
-
-       /*
-        * reset ino for searching victims from beginning of global extent tree.
-        */
-       ino = F2FS_ROOT_INO(sbi);
-
-       while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
-                               (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
-               unsigned i;
-
-               ino = treevec[found - 1]->ino + 1;
-               for (i = 0; i < found; i++) {
-                       struct extent_tree *et = treevec[i];
+               en = list_first_entry(&sbi->extent_list,
+                                       struct extent_node, list);
+               et = en->et;
+               if (!write_trylock(&et->lock)) {
+                       /* refresh this extent node's position in extent list */
+                       list_move_tail(&en->list, &sbi->extent_list);
+                       continue;
+               }
  
-                       if (!atomic_read(&et->node_cnt))
-                               continue;
+               list_del_init(&en->list);
+               spin_unlock(&sbi->extent_lock);
  
-                       if (write_trylock(&et->lock)) {
-                               node_cnt += __free_extent_tree(sbi, et, false);
-                               write_unlock(&et->lock);
-                       }
+               __detach_extent_node(sbi, et, en);
  
-                       if (node_cnt + tree_cnt >= nr_shrink)
-                               goto unlock_out;
-               }
+               write_unlock(&et->lock);
+               node_cnt++;
+               spin_lock(&sbi->extent_lock);
         }
+       spin_unlock(&sbi->extent_lock);
+
  unlock_out:
         up_write(&sbi->extent_tree_lock);
  out:
@@ -650,7 +622,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
                 return 0;
  
         write_lock(&et->lock);
-       node_cnt = __free_extent_tree(sbi, et, true);
+       node_cnt = __free_extent_tree(sbi, et);
         write_unlock(&et->lock);
  
         return node_cnt;
@@ -701,7 +673,6 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
  
  void f2fs_update_extent_cache(struct dnode_of_data *dn)
  {
-       struct f2fs_inode_info *fi = F2FS_I(dn->inode);
         pgoff_t fofs;
  
         if (!f2fs_may_extent_tree(dn->inode))
@@ -710,8 +681,8 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn)
         f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
  
  
-       fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
-                                                       dn->ofs_in_node;
+       fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+                                                               dn->ofs_in_node;
  
         if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
                 sync_inode_page(dn);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h

index ff79054c6cf6a5bfe254abaf203ae94acd44739b..f6a841b85d40a45bb720afa3a04b8e3a42373d90 100644 (file)
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -354,6 +354,7 @@ struct extent_node {
         struct rb_node rb_node;         /* rb node located in rb-tree */
         struct list_head list;          /* node in global extent list of sbi */
         struct extent_info ei;          /* extent info */
+       struct extent_tree *et;         /* extent tree pointer */
  };
  
  struct extent_tree {
@@ -382,6 +383,7 @@ struct f2fs_map_blocks {
         block_t m_lblk;
         unsigned int m_len;
         unsigned int m_flags;
+       pgoff_t *m_next_pgofs;          /* point next possible non-hole pgofs */
  };
  
  /* for flag in get_data_block */
@@ -389,6 +391,8 @@ struct f2fs_map_blocks {
  #define F2FS_GET_BLOCK_DIO             1
  #define F2FS_GET_BLOCK_FIEMAP          2
  #define F2FS_GET_BLOCK_BMAP            3
+#define F2FS_GET_BLOCK_PRE_DIO         4
+#define F2FS_GET_BLOCK_PRE_AIO         5
  
  /*
   * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
@@ -515,6 +519,7 @@ struct f2fs_nm_info {
         nid_t next_scan_nid;            /* the next nid to be scanned */
         unsigned int ram_thresh;        /* control the memory footprint */
         unsigned int ra_nid_pages;      /* # of nid pages to be readaheaded */
+       unsigned int dirty_nats_ratio;  /* control dirty nats ratio threshold */
  
         /* NAT cache management */
         struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -549,6 +554,8 @@ struct dnode_of_data {
         unsigned int ofs_in_node;       /* data offset in the node page */
         bool inode_page_locked;         /* inode page is locked or not */
         bool node_changed;              /* is node block changed */
+       char cur_level;                 /* level of hole node page */
+       char max_level;                 /* level of current page located */
         block_t data_blkaddr;           /* block address of the node block */
  };
  
@@ -844,8 +851,19 @@ struct f2fs_sb_info {
         struct list_head s_list;
         struct mutex umount_mutex;
         unsigned int shrinker_run_no;
+
+       /* For write statistics */
+       u64 sectors_written_start;
+       u64 kbytes_written;
  };
  
+/* For write statistics. Suppose sector size is 512 bytes,
+ * and the return value is in kbytes. s is of struct f2fs_sb_info.
+ */
+#define BD_PART_WRITTEN(s)                                              \
+(((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) -             \
+               s->sectors_written_start) >> 1)
+
  static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
  {
         sbi->last_time[type] = jiffies;
@@ -1525,9 +1543,9 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
         return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
  }
  
-static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
+static inline unsigned int addrs_per_inode(struct inode *inode)
  {
-       if (f2fs_has_inline_xattr(&fi->vfs_inode))
+       if (f2fs_has_inline_xattr(inode))
                 return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
         return DEF_ADDRS_PER_INODE;
  }
@@ -1681,10 +1699,10 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
          (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
  
  /* get offset of first page in next direct node */
-#define PGOFS_OF_NEXT_DNODE(pgofs, fi)                         \
-       ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) :  \
-       (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) /       \
-       ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+#define PGOFS_OF_NEXT_DNODE(pgofs, inode)                              \
+       ((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) :    \
+       (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) /    \
+       ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
  
  /*
   * file.c
@@ -1780,6 +1798,7 @@ int need_dentry_mark(struct f2fs_sb_info *, nid_t);
  bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
  bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
  void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
+pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t);
  int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
  int truncate_inode_blocks(struct inode *, pgoff_t);
  int truncate_xattr_node(struct inode *, struct page *);
@@ -1836,7 +1855,7 @@ void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
                                 block_t, block_t, unsigned char, bool);
  void allocate_data_block(struct f2fs_sb_info *, struct page *,
                 block_t, block_t *, struct f2fs_summary *, int);
-void f2fs_wait_on_page_writeback(struct page *, enum page_type);
+void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
  void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
  void write_data_summaries(struct f2fs_sb_info *, block_t);
  void write_node_summaries(struct f2fs_sb_info *, block_t);
@@ -1881,11 +1900,14 @@ void destroy_checkpoint_caches(void);
   * data.c
   */
  void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
+                               struct page *, nid_t, enum page_type, int);
  int f2fs_submit_page_bio(struct f2fs_io_info *);
  void f2fs_submit_page_mbio(struct f2fs_io_info *);
  void set_data_blkaddr(struct dnode_of_data *);
  int reserve_new_block(struct dnode_of_data *);
  int f2fs_get_block(struct dnode_of_data *, pgoff_t);
+ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
  int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
  struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
  struct page *find_data_page(struct inode *, pgoff_t);
@@ -1902,7 +1924,7 @@ int f2fs_release_page(struct page *, gfp_t);
   */
  int start_gc_thread(struct f2fs_sb_info *);
  void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
+block_t start_bidx_of_node(unsigned int, struct inode *);
  int f2fs_gc(struct f2fs_sb_info *, bool);
  void build_gc_manager(struct f2fs_sb_info *);
  
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c

index ea272be62677004d29c8018691ea442c2c992bb1..a4362d4d714b09f838bf46cad48d6f7aa62de2fd 100644 (file)
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -86,7 +86,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
         trace_f2fs_vm_page_mkwrite(page, DATA);
  mapped:
         /* fill the page */
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, false);
  
         /* wait for GCed encrypted page writeback */
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
@@ -358,15 +358,14 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
                 } else if (err == -ENOENT) {
                         /* direct node does not exists */
                         if (whence == SEEK_DATA) {
-                               pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
-                                                       F2FS_I(inode));
+                               pgofs = get_next_page_offset(&dn, pgofs);
                                 continue;
                         } else {
                                 goto found;
                         }
                 }
  
-               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+               end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
  
                 /* find data/hole in dnode block */
                 for (; dn.ofs_in_node < end_offset;
@@ -480,7 +479,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
                  * we will invalidate all blkaddr in the whole range.
                  */
                 fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
-                                               F2FS_I(dn->inode)) + ofs;
+                                                       dn->inode) + ofs;
                 f2fs_update_extent_cache_range(dn, fofs, 0, len);
                 dec_valid_block_count(sbi, dn->inode, nr_free);
                 sync_inode_page(dn);
@@ -521,7 +520,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
         if (IS_ERR(page))
                 return 0;
  truncate_out:
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
         zero_user(page, offset, PAGE_CACHE_SIZE - offset);
         if (!cache_only || !f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode))
                 set_page_dirty(page);
@@ -568,7 +567,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
                 goto out;
         }
  
-       count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+       count = ADDRS_PER_PAGE(dn.node_page, inode);
  
         count -= dn.ofs_in_node;
         f2fs_bug_on(sbi, count < 0);
@@ -743,7 +742,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
         if (IS_ERR(page))
                 return PTR_ERR(page);
  
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
         zero_user(page, start, len);
         set_page_dirty(page);
         f2fs_put_page(page, 1);
@@ -768,7 +767,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
                         return err;
                 }
  
-               end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+               end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
                 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
  
                 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
@@ -892,7 +891,7 @@ static int __exchange_data_block(struct inode *inode, pgoff_t src,
                 psrc = get_lock_data_page(inode, src, true);
                 if (IS_ERR(psrc))
                         return PTR_ERR(psrc);
-               pdst = get_new_data_page(inode, NULL, dst, false);
+               pdst = get_new_data_page(inode, NULL, dst, true);
                 if (IS_ERR(pdst)) {
                         f2fs_put_page(psrc, 1);
                         return PTR_ERR(pdst);
@@ -1648,7 +1647,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
                                         struct f2fs_defragment *range)
  {
         struct inode *inode = file_inode(filp);
-       struct f2fs_map_blocks map;
+       struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
         struct extent_info ei;
         pgoff_t pg_start, pg_end;
         unsigned int blk_per_seg = sbi->blocks_per_seg;
@@ -1874,14 +1873,32 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  
  static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
  {
-       struct inode *inode = file_inode(iocb->ki_filp);
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       ssize_t ret;
  
         if (f2fs_encrypted_inode(inode) &&
                                 !f2fs_has_encryption_key(inode) &&
                                 f2fs_get_encryption_info(inode))
                 return -EACCES;
  
-       return generic_file_write_iter(iocb, from);
+       inode_lock(inode);
+       ret = generic_write_checks(iocb, from);
+       if (ret > 0) {
+               ret = f2fs_preallocate_blocks(iocb, from);
+               if (!ret)
+                       ret = __generic_file_write_iter(iocb, from);
+       }
+       inode_unlock(inode);
+
+       if (ret > 0) {
+               ssize_t err;
+
+               err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+               if (err < 0)
+                       ret = err;
+       }
+       return ret;
  }
  
  #ifdef CONFIG_COMPAT
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c

index f610c2a9bdde9561d3be71ab4d674376db8ec401..47ade3542fbdd679471dfc9927f6d2782034c45f 100644 (file)
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -245,6 +245,18 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
                 return get_cb_cost(sbi, segno);
  }
  
+static unsigned int count_bits(const unsigned long *addr,
+                               unsigned int offset, unsigned int len)
+{
+       unsigned int end = offset + len, sum = 0;
+
+       while (offset < end) {
+               if (test_bit(offset++, addr))
+                       ++sum;
+       }
+       return sum;
+}
+
  /*
   * This function is called from two paths.
   * One is garbage collection and the other is SSR segment selection.
@@ -260,7 +272,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
         struct victim_sel_policy p;
         unsigned int secno, max_cost;
         unsigned int last_segment = MAIN_SEGS(sbi);
-       int nsearched = 0;
+       unsigned int nsearched = 0;
  
         mutex_lock(&dirty_i->seglist_lock);
  
@@ -295,26 +307,31 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
                 }
  
                 p.offset = segno + p.ofs_unit;
-               if (p.ofs_unit > 1)
+               if (p.ofs_unit > 1) {
                         p.offset -= segno % p.ofs_unit;
+                       nsearched += count_bits(p.dirty_segmap,
+                                               p.offset - p.ofs_unit,
+                                               p.ofs_unit);
+               } else {
+                       nsearched++;
+               }
+
  
                 secno = GET_SECNO(sbi, segno);
  
                 if (sec_usage_check(sbi, secno))
-                       continue;
+                       goto next;
                 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
-                       continue;
+                       goto next;
  
                 cost = get_gc_cost(sbi, segno, &p);
  
                 if (p.min_cost > cost) {
                         p.min_segno = segno;
                         p.min_cost = cost;
-               } else if (unlikely(cost == max_cost)) {
-                       continue;
                 }
-
-               if (nsearched++ >= p.max_search) {
+next:
+               if (nsearched >= p.max_search) {
                         sbi->last_victim[p.gc_mode] = segno;
                         break;
                 }
@@ -399,7 +416,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
   * On validity, copy that node with cold status, otherwise (invalid node)
   * ignore that.
   */
-static int gc_node_segment(struct f2fs_sb_info *sbi,
+static void gc_node_segment(struct f2fs_sb_info *sbi,
                 struct f2fs_summary *sum, unsigned int segno, int gc_type)
  {
         bool initial = true;
@@ -419,7 +436,7 @@ next_step:
  
                 /* stop BG_GC if there is not enough free sections. */
                 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
-                       return 0;
+                       return;
  
                 if (check_valid_map(sbi, segno, off) == 0)
                         continue;
@@ -446,7 +463,7 @@ next_step:
  
                 /* set page dirty and write it */
                 if (gc_type == FG_GC) {
-                       f2fs_wait_on_page_writeback(node_page, NODE);
+                       f2fs_wait_on_page_writeback(node_page, NODE, true);
                         set_page_dirty(node_page);
                 } else {
                         if (!PageWriteback(node_page))
@@ -460,20 +477,6 @@ next_step:
                 initial = false;
                 goto next_step;
         }
-
-       if (gc_type == FG_GC) {
-               struct writeback_control wbc = {
-                       .sync_mode = WB_SYNC_ALL,
-                       .nr_to_write = LONG_MAX,
-                       .for_reclaim = 0,
-               };
-               sync_node_pages(sbi, 0, &wbc);
-
-               /* return 1 only if FG_GC succefully reclaimed one */
-               if (get_valid_blocks(sbi, segno, 1) == 0)
-                       return 1;
-       }
-       return 0;
  }
  
  /*
@@ -483,7 +486,7 @@ next_step:
   * as indirect or double indirect node blocks, are given, it must be a caller's
   * bug.
   */
-block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
+block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
  {
         unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
         unsigned int bidx;
@@ -500,7 +503,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
                 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
                 bidx = node_ofs - 5 - dec;
         }
-       return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
+       return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
  }
  
  static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -567,7 +570,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
          * don't cache encrypted data into meta inode until previous dirty
          * data were writebacked to avoid racing between GC and flush.
          */
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
  
         get_node_info(fio.sbi, dn.nid, &ni);
         set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
@@ -596,14 +599,14 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
                 goto put_page_out;
  
         set_page_dirty(fio.encrypted_page);
-       f2fs_wait_on_page_writeback(fio.encrypted_page, DATA);
+       f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
         if (clear_page_dirty_for_io(fio.encrypted_page))
                 dec_page_count(fio.sbi, F2FS_DIRTY_META);
  
         set_page_writeback(fio.encrypted_page);
  
         /* allocate block address */
-       f2fs_wait_on_page_writeback(dn.node_page, NODE);
+       f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
         allocate_data_block(fio.sbi, NULL, fio.blk_addr,
                                         &fio.blk_addr, &sum, CURSEG_COLD_DATA);
         fio.rw = WRITE_SYNC;
@@ -645,7 +648,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
                         .encrypted_page = NULL,
                 };
                 set_page_dirty(page);
-               f2fs_wait_on_page_writeback(page, DATA);
+               f2fs_wait_on_page_writeback(page, DATA, true);
                 if (clear_page_dirty_for_io(page))
                         inode_dec_dirty_pages(inode);
                 set_cold_data(page);
@@ -663,7 +666,7 @@ out:
   * If the parent node is not valid or the data block address is different,
   * the victim data block is ignored.
   */
-static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                 struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
  {
         struct super_block *sb = sbi->sb;
@@ -686,7 +689,7 @@ next_step:
  
                 /* stop BG_GC if there is not enough free sections. */
                 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
-                       return 0;
+                       return;
  
                 if (check_valid_map(sbi, segno, off) == 0)
                         continue;
@@ -719,7 +722,7 @@ next_step:
                                 continue;
                         }
  
-                       start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+                       start_bidx = start_bidx_of_node(nofs, inode);
                         data_page = get_read_data_page(inode,
                                         start_bidx + ofs_in_node, READA, true);
                         if (IS_ERR(data_page)) {
@@ -735,7 +738,7 @@ next_step:
                 /* phase 3 */
                 inode = find_gc_inode(gc_list, dni.ino);
                 if (inode) {
-                       start_bidx = start_bidx_of_node(nofs, F2FS_I(inode))
+                       start_bidx = start_bidx_of_node(nofs, inode)
                                                                 + ofs_in_node;
                         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                                 move_encrypted_block(inode, start_bidx);
@@ -747,15 +750,6 @@ next_step:
  
         if (++phase < 4)
                 goto next_step;
-
-       if (gc_type == FG_GC) {
-               f2fs_submit_merged_bio(sbi, DATA, WRITE);
-
-               /* return 1 only if FG_GC succefully reclaimed one */
-               if (get_valid_blocks(sbi, segno, 1) == 0)
-                       return 1;
-       }
-       return 0;
  }
  
  static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -771,53 +765,90 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
         return ret;
  }
  
-static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
+static int do_garbage_collect(struct f2fs_sb_info *sbi,
+                               unsigned int start_segno,
                                 struct gc_inode_list *gc_list, int gc_type)
  {
         struct page *sum_page;
         struct f2fs_summary_block *sum;
         struct blk_plug plug;
-       int nfree = 0;
+       unsigned int segno = start_segno;
+       unsigned int end_segno = start_segno + sbi->segs_per_sec;
+       int seg_freed = 0;
+       unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
+                                               SUM_TYPE_DATA : SUM_TYPE_NODE;
  
-       /* read segment summary of victim */
-       sum_page = get_sum_page(sbi, segno);
+       /* readahead multi ssa blocks those have contiguous address */
+       if (sbi->segs_per_sec > 1)
+               ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
+                                       sbi->segs_per_sec, META_SSA, true);
+
+       /* reference all summary page */
+       while (segno < end_segno) {
+               sum_page = get_sum_page(sbi, segno++);
+               unlock_page(sum_page);
+       }
  
         blk_start_plug(&plug);
  
-       sum = page_address(sum_page);
+       for (segno = start_segno; segno < end_segno; segno++) {
+               /* find segment summary of victim */
+               sum_page = find_get_page(META_MAPPING(sbi),
+                                       GET_SUM_BLOCK(sbi, segno));
+               f2fs_bug_on(sbi, !PageUptodate(sum_page));
+               f2fs_put_page(sum_page, 0);
  
-       /*
-        * this is to avoid deadlock:
-        * - lock_page(sum_page)         - f2fs_replace_block
-        *  - check_valid_map()            - mutex_lock(sentry_lock)
-        *   - mutex_lock(sentry_lock)     - change_curseg()
-        *                                  - lock_page(sum_page)
-        */
-       unlock_page(sum_page);
-
-       switch (GET_SUM_TYPE((&sum->footer))) {
-       case SUM_TYPE_NODE:
-               nfree = gc_node_segment(sbi, sum->entries, segno, gc_type);
-               break;
-       case SUM_TYPE_DATA:
-               nfree = gc_data_segment(sbi, sum->entries, gc_list,
-                                                       segno, gc_type);
-               break;
+               sum = page_address(sum_page);
+               f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
+
+               /*
+                * this is to avoid deadlock:
+                * - lock_page(sum_page)         - f2fs_replace_block
+                *  - check_valid_map()            - mutex_lock(sentry_lock)
+                *   - mutex_lock(sentry_lock)     - change_curseg()
+                *                                  - lock_page(sum_page)
+                */
+
+               if (type == SUM_TYPE_NODE)
+                       gc_node_segment(sbi, sum->entries, segno, gc_type);
+               else
+                       gc_data_segment(sbi, sum->entries, gc_list, segno,
+                                                               gc_type);
+
+               stat_inc_seg_count(sbi, type, gc_type);
+               stat_inc_call_count(sbi->stat_info);
+
+               f2fs_put_page(sum_page, 0);
         }
-       blk_finish_plug(&plug);
  
-       stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
-       stat_inc_call_count(sbi->stat_info);
+       if (gc_type == FG_GC) {
+               if (type == SUM_TYPE_NODE) {
+                       struct writeback_control wbc = {
+                               .sync_mode = WB_SYNC_ALL,
+                               .nr_to_write = LONG_MAX,
+                               .for_reclaim = 0,
+                       };
+                       sync_node_pages(sbi, 0, &wbc);
+               } else {
+                       f2fs_submit_merged_bio(sbi, DATA, WRITE);
+               }
+       }
  
-       f2fs_put_page(sum_page, 0);
-       return nfree;
+       blk_finish_plug(&plug);
+
+       if (gc_type == FG_GC) {
+               while (start_segno < end_segno)
+                       if (get_valid_blocks(sbi, start_segno++, 1) == 0)
+                               seg_freed++;
+       }
+       return seg_freed;
  }
  
  int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
  {
-       unsigned int segno, i;
+       unsigned int segno;
         int gc_type = sync ? FG_GC : BG_GC;
-       int sec_freed = 0;
+       int sec_freed = 0, seg_freed;
         int ret = -EINVAL;
         struct cp_control cpc;
         struct gc_inode_list gc_list = {
@@ -838,30 +869,24 @@ gc_more:
  
         if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
                 gc_type = FG_GC;
+               /*
+                * If there is no victim and no prefree segment but still not
+                * enough free sections, we should flush dent/node blocks and do
+                * garbage collections.
+                */
                 if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
                         write_checkpoint(sbi, &cpc);
+               else if (has_not_enough_free_secs(sbi, 0))
+                       write_checkpoint(sbi, &cpc);
         }
  
         if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
                 goto stop;
         ret = 0;
  
-       /* readahead multi ssa blocks those have contiguous address */
-       if (sbi->segs_per_sec > 1)
-               ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
-                                                       META_SSA, true);
-
-       for (i = 0; i < sbi->segs_per_sec; i++) {
-               /*
-                * for FG_GC case, halt gcing left segments once failed one
-                * of segments in selected section to avoid long latency.
-                */
-               if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) &&
-                               gc_type == FG_GC)
-                       break;
-       }
+       seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
  
-       if (i == sbi->segs_per_sec && gc_type == FG_GC)
+       if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
                 sec_freed++;
  
         if (gc_type == FG_GC)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c

index c3f0b7d4cfca174bba6e6b4baa796640b767d2ec..0be4a9b400c63db0fecd285b3ad21fe665cb5f7f 100644 (file)
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -71,7 +71,7 @@ bool truncate_inline_inode(struct page *ipage, u64 from)
  
         addr = inline_data_addr(ipage);
  
-       f2fs_wait_on_page_writeback(ipage, NODE);
+       f2fs_wait_on_page_writeback(ipage, NODE, true);
         memset(addr + from, 0, MAX_INLINE_DATA - from);
  
         return true;
@@ -124,8 +124,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
         if (err)
                 return err;
  
-       f2fs_wait_on_page_writeback(page, DATA);
-
+       f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
         if (PageUptodate(page))
                 goto no_update;
  
@@ -150,7 +149,7 @@ no_update:
         write_data_page(dn, &fio);
         set_data_blkaddr(dn);
         f2fs_update_extent_cache(dn);
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
         if (dirty)
                 inode_dec_dirty_pages(dn->inode);
  
@@ -159,6 +158,7 @@ no_update:
  
         /* clear inline data and flag after data writeback */
         truncate_inline_inode(dn->inode_page, 0);
+       clear_inline_node(dn->inode_page);
  clear_out:
         stat_dec_inline_inode(dn->inode);
         f2fs_clear_inline_inode(dn->inode);
@@ -223,7 +223,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
  
         f2fs_bug_on(F2FS_I_SB(inode), page->index);
  
-       f2fs_wait_on_page_writeback(dn.inode_page, NODE);
+       f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
         src_addr = kmap_atomic(page);
         dst_addr = inline_data_addr(dn.inode_page);
         memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
@@ -233,6 +233,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
         set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
  
         sync_inode_page(&dn);
+       clear_inline_node(dn.inode_page);
         f2fs_put_dnode(&dn);
         return 0;
  }
@@ -261,7 +262,7 @@ process_inline:
                 ipage = get_node_page(sbi, inode->i_ino);
                 f2fs_bug_on(sbi, IS_ERR(ipage));
  
-               f2fs_wait_on_page_writeback(ipage, NODE);
+               f2fs_wait_on_page_writeback(ipage, NODE, true);
  
                 src_addr = inline_data_addr(npage);
                 dst_addr = inline_data_addr(ipage);
@@ -389,7 +390,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
         if (err)
                 goto out;
  
-       f2fs_wait_on_page_writeback(page, DATA);
+       f2fs_wait_on_page_writeback(page, DATA, true);
         zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
  
         dentry_blk = kmap_atomic(page);
@@ -469,7 +470,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
                 }
         }
  
-       f2fs_wait_on_page_writeback(ipage, NODE);
+       f2fs_wait_on_page_writeback(ipage, NODE, true);
  
         name_hash = f2fs_dentry_hash(name);
         make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
@@ -507,7 +508,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
         int i;
  
         lock_page(page);
-       f2fs_wait_on_page_writeback(page, NODE);
+       f2fs_wait_on_page_writeback(page, NODE, true);
  
         inline_dentry = inline_data_addr(page);
         bit_pos = dentry - inline_dentry->dentry;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c

index 2adeff26be11b9689dde24a5d22b0bd3351c21b7..60e3b3078b81469b300fca0d4bf44b2117f709bf 100644 (file)
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -83,7 +83,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
  
         while (start < end) {
                 if (*start++) {
-                       f2fs_wait_on_page_writeback(ipage, NODE);
+                       f2fs_wait_on_page_writeback(ipage, NODE, true);
  
                         set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
                         set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage));
@@ -227,7 +227,7 @@ int update_inode(struct inode *inode, struct page *node_page)
  {
         struct f2fs_inode *ri;
  
-       f2fs_wait_on_page_writeback(node_page, NODE);
+       f2fs_wait_on_page_writeback(node_page, NODE, true);
  
         ri = F2FS_INODE(node_page);
  
@@ -263,6 +263,10 @@ int update_inode(struct inode *inode, struct page *node_page)
         set_cold_node(inode, node_page);
         clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
  
+       /* deleted inode */
+       if (inode->i_nlink == 0)
+               clear_inline_node(node_page);
+
         return set_page_dirty(node_page);
  }
  
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c

index 342597a5897f059a2d31823923d8664861b7b969..150907ffa7aaf871772488c880d98ce3d86620c5 100644 (file)
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -403,14 +403,45 @@ cache:
         up_write(&nm_i->nat_tree_lock);
  }
  
+pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
+{
+       const long direct_index = ADDRS_PER_INODE(dn->inode);
+       const long direct_blks = ADDRS_PER_BLOCK;
+       const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+       unsigned int skipped_unit = ADDRS_PER_BLOCK;
+       int cur_level = dn->cur_level;
+       int max_level = dn->max_level;
+       pgoff_t base = 0;
+
+       if (!dn->max_level)
+               return pgofs + 1;
+
+       while (max_level-- > cur_level)
+               skipped_unit *= NIDS_PER_BLOCK;
+
+       switch (dn->max_level) {
+       case 3:
+               base += 2 * indirect_blks;
+       case 2:
+               base += 2 * direct_blks;
+       case 1:
+               base += direct_index;
+               break;
+       default:
+               f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
+       }
+
+       return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
+}
+
  /*
   * The maximum depth is four.
   * Offset[0] will have raw inode offset.
   */
-static int get_node_path(struct f2fs_inode_info *fi, long block,
+static int get_node_path(struct inode *inode, long block,
                                 int offset[4], unsigned int noffset[4])
  {
-       const long direct_index = ADDRS_PER_INODE(fi);
+       const long direct_index = ADDRS_PER_INODE(inode);
         const long direct_blks = ADDRS_PER_BLOCK;
         const long dptrs_per_blk = NIDS_PER_BLOCK;
         const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
@@ -495,10 +526,10 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
         int offset[4];
         unsigned int noffset[4];
         nid_t nids[4];
-       int level, i;
+       int level, i = 0;
         int err = 0;
  
-       level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
+       level = get_node_path(dn->inode, index, offset, noffset);
  
         nids[0] = dn->inode->i_ino;
         npage[0] = dn->inode_page;
@@ -585,6 +616,10 @@ release_pages:
  release_out:
         dn->inode_page = NULL;
         dn->node_page = NULL;
+       if (err == -ENOENT) {
+               dn->cur_level = i;
+               dn->max_level = level;
+       }
         return err;
  }
  
@@ -792,7 +827,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
  
         trace_f2fs_truncate_inode_blocks_enter(inode, from);
  
-       level = get_node_path(F2FS_I(inode), from, offset, noffset);
+       level = get_node_path(inode, from, offset, noffset);
  restart:
         page = get_node_page(sbi, inode->i_ino);
         if (IS_ERR(page)) {
@@ -861,7 +896,7 @@ skip_partial:
                                 f2fs_put_page(page, 1);
                                 goto restart;
                         }
-                       f2fs_wait_on_page_writeback(page, NODE);
+                       f2fs_wait_on_page_writeback(page, NODE, true);
                         ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
                         set_page_dirty(page);
                         unlock_page(page);
@@ -976,7 +1011,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
         new_ni.ino = dn->inode->i_ino;
         set_node_addr(sbi, &new_ni, NEW_ADDR, false);
  
-       f2fs_wait_on_page_writeback(page, NODE);
+       f2fs_wait_on_page_writeback(page, NODE, true);
         fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
         set_cold_node(dn->inode, page);
         SetPageUptodate(page);
@@ -1154,6 +1189,39 @@ void sync_inode_page(struct dnode_of_data *dn)
         dn->node_changed = ret ? true: false;
  }
  
+static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
+{
+       struct inode *inode;
+       struct page *page;
+
+       /* should flush inline_data before evict_inode */
+       inode = ilookup(sbi->sb, ino);
+       if (!inode)
+               return;
+
+       page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0);
+       if (!page)
+               goto iput_out;
+
+       if (!PageUptodate(page))
+               goto page_out;
+
+       if (!PageDirty(page))
+               goto page_out;
+
+       if (!clear_page_dirty_for_io(page))
+               goto page_out;
+
+       if (!f2fs_write_inline_data(inode, page))
+               inode_dec_dirty_pages(inode);
+       else
+               set_page_dirty(page);
+page_out:
+       f2fs_put_page(page, 1);
+iput_out:
+       iput(inode);
+}
+
  int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
                                         struct writeback_control *wbc)
  {
@@ -1221,6 +1289,17 @@ continue_unlock:
                                 goto continue_unlock;
                         }
  
+                       /* flush inline_data */
+                       if (!ino && is_inline_node(page)) {
+                               clear_inline_node(page);
+                               unlock_page(page);
+                               flush_inline_data(sbi, ino_of_node(page));
+                               continue;
+                       }
+
+                       f2fs_wait_on_page_writeback(page, NODE, true);
+
+                       BUG_ON(PageWriteback(page));
                         if (!clear_page_dirty_for_io(page))
                                 goto continue_unlock;
  
@@ -1258,8 +1337,13 @@ continue_unlock:
                 goto next_step;
         }
  
-       if (wrote)
-               f2fs_submit_merged_bio(sbi, NODE, WRITE);
+       if (wrote) {
+               if (ino)
+                       f2fs_submit_merged_bio_cond(sbi, NULL, NULL,
+                                                       ino, NODE, WRITE);
+               else
+                       f2fs_submit_merged_bio(sbi, NODE, WRITE);
+       }
         return nwritten;
  }
  
@@ -1287,7 +1371,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
                                 continue;
  
                         if (ino && ino_of_node(page) == ino) {
-                               f2fs_wait_on_page_writeback(page, NODE);
+                               f2fs_wait_on_page_writeback(page, NODE, true);
                                 if (TestClearPageError(page))
                                         ret = -EIO;
                         }
@@ -1326,8 +1410,6 @@ static int f2fs_write_node_page(struct page *page,
         if (unlikely(f2fs_cp_error(sbi)))
                 goto redirty_out;
  
-       f2fs_wait_on_page_writeback(page, NODE);
-
         /* get old block addr of this node page */
         nid = nid_of_node(page);
         f2fs_bug_on(sbi, page->index != nid);
@@ -1356,9 +1438,13 @@ static int f2fs_write_node_page(struct page *page,
         set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
         dec_page_count(sbi, F2FS_DIRTY_NODES);
         up_read(&sbi->node_write);
+
+       if (wbc->for_reclaim)
+               f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
+
         unlock_page(page);
  
-       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
+       if (unlikely(f2fs_cp_error(sbi)))
                 f2fs_submit_merged_bio(sbi, NODE, WRITE);
  
         return 0;
@@ -1374,8 +1460,6 @@ static int f2fs_write_node_pages(struct address_space *mapping,
         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
         long diff;
  
-       trace_f2fs_writepages(mapping->host, wbc, NODE);
-
         /* balancing f2fs's metadata in background */
         f2fs_balance_fs_bg(sbi);
  
@@ -1383,6 +1467,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
         if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
                 goto skip_write;
  
+       trace_f2fs_writepages(mapping->host, wbc, NODE);
+
         diff = nr_pages_to_write(sbi, NODE, wbc);
         wbc->sync_mode = WB_SYNC_NONE;
         sync_node_pages(sbi, 0, wbc);
@@ -1391,6 +1477,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
  
  skip_write:
         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
+       trace_f2fs_writepages(mapping->host, wbc, NODE);
         return 0;
  }
  
@@ -1703,7 +1790,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
         src_addr = inline_xattr_addr(page);
         inline_size = inline_xattr_size(inode);
  
-       f2fs_wait_on_page_writeback(ipage, NODE);
+       f2fs_wait_on_page_writeback(ipage, NODE, true);
         memcpy(dst_addr, src_addr, inline_size);
  update_inode:
         update_inode(inode, ipage);
@@ -2000,6 +2087,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
         nm_i->nat_cnt = 0;
         nm_i->ram_thresh = DEF_RAM_THRESHOLD;
         nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
+       nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
  
         INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
         INIT_LIST_HEAD(&nm_i->free_nid_list);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h

index d4d1f636fe1c36c923ebcd68da46249161ccafc8..1f4f9d4569d9cb4f8a6ed93ff112fa300cd49444 100644 (file)
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -25,6 +25,9 @@
  /* control the memory footprint threshold (10MB per 1GB ram) */
  #define DEF_RAM_THRESHOLD      10
  
+/* control dirty nats ratio threshold (default: 10% over max nid count) */
+#define DEF_DIRTY_NAT_RATIO_THRESHOLD          10
+
  /* vector size for gang look-up from nat cache that consists of radix tree */
  #define NATVEC_SIZE    64
  #define SETVEC_SIZE    32
@@ -117,6 +120,12 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
         raw_ne->version = ni->version;
  }
  
+static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
+{
+       return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
+                                       NM_I(sbi)->dirty_nats_ratio / 100;
+}
+
  enum mem_type {
         FREE_NIDS,      /* indicates the free nid list */
         NAT_ENTRIES,    /* indicates the cached nat entry */
@@ -321,7 +330,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
  {
         struct f2fs_node *rn = F2FS_NODE(p);
  
-       f2fs_wait_on_page_writeback(p, NODE);
+       f2fs_wait_on_page_writeback(p, NODE, true);
  
         if (i)
                 rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
@@ -370,6 +379,21 @@ static inline int is_node(struct page *page, int type)
  #define is_fsync_dnode(page)   is_node(page, FSYNC_BIT_SHIFT)
  #define is_dent_dnode(page)    is_node(page, DENT_BIT_SHIFT)
  
+static inline int is_inline_node(struct page *page)
+{
+       return PageChecked(page);
+}
+
+static inline void set_inline_node(struct page *page)
+{
+       SetPageChecked(page);
+}
+
+static inline void clear_inline_node(struct page *page)
+{
+       ClearPageChecked(page);
+}
+
  static inline void set_cold_node(struct inode *inode, struct page *page)
  {
         struct f2fs_node *rn = F2FS_NODE(page);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c

index 589b20b8677b8cc1dcf3d16a6f7e0943ce0283eb..5045dd6a27e96c9a890e3e885ac01947c4914c53 100644 (file)
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -350,8 +350,7 @@ got_it:
                 inode = dn->inode;
         }
  
-       bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
-                       le16_to_cpu(sum.ofs_in_node);
+       bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
  
         /*
          * if inode page is locked, unlock temporarily, but its reference
@@ -386,10 +385,9 @@ truncate_out:
  static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
                                         struct page *page, block_t blkaddr)
  {
-       struct f2fs_inode_info *fi = F2FS_I(inode);
-       unsigned int start, end;
         struct dnode_of_data dn;
         struct node_info ni;
+       unsigned int start, end;
         int err = 0, recovered = 0;
  
         /* step 1: recover xattr */
@@ -409,8 +407,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
                 goto out;
  
         /* step 3: recover data indices */
-       start = start_bidx_of_node(ofs_of_node(page), fi);
-       end = start + ADDRS_PER_PAGE(page, fi);
+       start = start_bidx_of_node(ofs_of_node(page), inode);
+       end = start + ADDRS_PER_PAGE(page, inode);
  
         set_new_dnode(&dn, inode, NULL, NULL, 0);
  
@@ -418,7 +416,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
         if (err)
                 goto out;
  
-       f2fs_wait_on_page_writeback(dn.node_page, NODE);
+       f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
  
         get_node_info(sbi, dn.nid, &ni);
         f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c

index 5904a411c86fe8d4feb70f695809273b3a46bd6a..57a5f7bb275ae21b0935e1d8f89e83624bf06117 100644 (file)
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -223,7 +223,8 @@ int commit_inmem_pages(struct inode *inode, bool abort)
                 if (!abort) {
                         if (cur->page->mapping == inode->i_mapping) {
                                 set_page_dirty(cur->page);
-                               f2fs_wait_on_page_writeback(cur->page, DATA);
+                               f2fs_wait_on_page_writeback(cur->page, DATA,
+                                                                       true);
                                 if (clear_page_dirty_for_io(cur->page))
                                         inode_dec_dirty_pages(inode);
                                 trace_f2fs_commit_inmem_page(cur->page, INMEM);
@@ -253,7 +254,8 @@ int commit_inmem_pages(struct inode *inode, bool abort)
         if (!abort) {
                 f2fs_unlock_op(sbi);
                 if (submit_bio)
-                       f2fs_submit_merged_bio(sbi, DATA, WRITE);
+                       f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0,
+                                                               DATA, WRITE);
         }
         return err;
  }
@@ -291,8 +293,9 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
  
         /* checkpoint is the only way to shrink partial cached entries */
         if (!available_free_memory(sbi, NAT_ENTRIES) ||
-                       excess_prefree_segs(sbi) ||
                         !available_free_memory(sbi, INO_ENTRIES) ||
+                       excess_prefree_segs(sbi) ||
+                       excess_dirty_nats(sbi) ||
                         (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
                 if (test_opt(sbi, DATA_FLUSH))
                         sync_dirty_inodes(sbi, FILE_INODE);
@@ -873,9 +876,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
  
         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
                 segno = find_next_zero_bit(free_i->free_segmap,
-                                       MAIN_SEGS(sbi), *newseg + 1);
-               if (segno - *newseg < sbi->segs_per_sec -
-                                       (*newseg % sbi->segs_per_sec))
+                               (hint + 1) * sbi->segs_per_sec, *newseg + 1);
+               if (segno < (hint + 1) * sbi->segs_per_sec)
                         goto got_it;
         }
  find_other_zone:
@@ -1415,53 +1417,17 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
         f2fs_update_extent_cache(dn);
  }
  
-static inline bool is_merged_page(struct f2fs_sb_info *sbi,
-                                       struct page *page, enum page_type type)
-{
-       enum page_type btype = PAGE_TYPE_OF_BIO(type);
-       struct f2fs_bio_info *io = &sbi->write_io[btype];
-       struct bio_vec *bvec;
-       struct page *target;
-       int i;
-
-       down_read(&io->io_rwsem);
-       if (!io->bio) {
-               up_read(&io->io_rwsem);
-               return false;
-       }
-
-       bio_for_each_segment_all(bvec, io->bio, i) {
-
-               if (bvec->bv_page->mapping) {
-                       target = bvec->bv_page;
-               } else {
-                       struct f2fs_crypto_ctx *ctx;
-
-                       /* encrypted page */
-                       ctx = (struct f2fs_crypto_ctx *)page_private(
-                                                               bvec->bv_page);
-                       target = ctx->w.control_page;
-               }
-
-               if (page == target) {
-                       up_read(&io->io_rwsem);
-                       return true;
-               }
-       }
-
-       up_read(&io->io_rwsem);
-       return false;
-}
-
  void f2fs_wait_on_page_writeback(struct page *page,
-                               enum page_type type)
+                               enum page_type type, bool ordered)
  {
         if (PageWriteback(page)) {
                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
  
-               if (is_merged_page(sbi, page, type))
-                       f2fs_submit_merged_bio(sbi, type, WRITE);
-               wait_on_page_writeback(page);
+               f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE);
+               if (ordered)
+                       wait_on_page_writeback(page);
+               else
+                       wait_for_stable_page(page);
         }
  }
  
@@ -1477,7 +1443,7 @@ void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
  
         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
         if (cpage) {
-               f2fs_wait_on_page_writeback(cpage, DATA);
+               f2fs_wait_on_page_writeback(cpage, DATA, true);
                 f2fs_put_page(cpage, 1);
         }
  }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h

index ee44d346ea44143e5610c59c3d0ed66c88526c43..cd7111b9a664f542865e1272b25554443ac90c5a 100644 (file)
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -183,7 +183,7 @@ struct segment_allocation {
   * this value is set in page as a private data which indicate that
   * the page is atomically written, and it is in inmem_pages list.
   */
-#define ATOMIC_WRITTEN_PAGE            0x0000ffff
+#define ATOMIC_WRITTEN_PAGE            ((unsigned long)-1)
  
  #define IS_ATOMIC_WRITTEN_PAGE(page)                   \
                 (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c

index 6134832baaaf0b25a86ed9cec1e8121b3ba3e4e2..9445a34b8d48c184ecf0762b65dc2096896d2885 100644 (file)
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -126,6 +126,19 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
         return NULL;
  }
  
+static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+               struct f2fs_sb_info *sbi, char *buf)
+{
+       struct super_block *sb = sbi->sb;
+
+       if (!sb->s_bdev->bd_part)
+               return snprintf(buf, PAGE_SIZE, "0\n");
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+               (unsigned long long)(sbi->kbytes_written +
+                       BD_PART_WRITTEN(sbi)));
+}
+
  static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
                         struct f2fs_sb_info *sbi, char *buf)
  {
@@ -204,6 +217,9 @@ static struct f2fs_attr f2fs_attr_##_name = {                       \
                 f2fs_sbi_show, f2fs_sbi_store,                  \
                 offsetof(struct struct_name, elname))
  
+#define F2FS_GENERAL_RO_ATTR(name) \
+static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
+
  F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
  F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
  F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
@@ -216,10 +232,12 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
  F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
  F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
  
  #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
  static struct attribute *f2fs_attrs[] = {
@@ -237,8 +255,10 @@ static struct attribute *f2fs_attrs[] = {
         ATTR_LIST(dir_level),
         ATTR_LIST(ram_thresh),
         ATTR_LIST(ra_nid_pages),
+       ATTR_LIST(dirty_nats_ratio),
         ATTR_LIST(cp_interval),
         ATTR_LIST(idle_interval),
+       ATTR_LIST(lifetime_write_kbytes),
         NULL,
  };
  
@@ -562,6 +582,13 @@ static void f2fs_put_super(struct super_block *sb)
         f2fs_leave_shrinker(sbi);
         mutex_unlock(&sbi->umount_mutex);
  
+       /* our cp_error case, we can wait for any writeback page */
+       if (get_pages(sbi, F2FS_WRITEBACK)) {
+               f2fs_submit_merged_bio(sbi, DATA, WRITE);
+               f2fs_submit_merged_bio(sbi, NODE, WRITE);
+               f2fs_submit_merged_bio(sbi, META, WRITE);
+       }
+
         iput(sbi->node_inode);
         iput(sbi->meta_inode);
  
@@ -766,8 +793,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
         bool need_stop_gc = false;
         bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
  
-       sync_filesystem(sb);
-
         /*
          * Save the old mount options in case we
          * need to restore them.
@@ -775,6 +800,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
         org_mount_opt = sbi->mount_opt;
         active_logs = sbi->active_logs;
  
+       if (*flags & MS_RDONLY) {
+               set_opt(sbi, FASTBOOT);
+               set_sbi_flag(sbi, SBI_IS_DIRTY);
+       }
+
+       sync_filesystem(sb);
+
         sbi->mount_opt.opt = 0;
         default_options(sbi);
  
@@ -1242,6 +1274,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
         bool retry = true, need_fsck = false;
         char *options = NULL;
         int recovery, i, valid_super_block;
+       struct curseg_info *seg_i;
  
  try_onemore:
         err = -EINVAL;
@@ -1372,6 +1405,17 @@ try_onemore:
                 goto free_nm;
         }
  
+       /* For write statistics */
+       if (sb->s_bdev->bd_part)
+               sbi->sectors_written_start =
+                       (u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+
+       /* Read accumulated write IO statistics if exists */
+       seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
+       if (__exist_node_summaries(sbi))
+               sbi->kbytes_written =
+                       le64_to_cpu(seg_i->sum_blk->info.kbytes_written);
+
         build_gc_manager(sbi);
  
         /* get an inode for node space */
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c

index 10f1e784fa2390148aaa37f527526162be2d724d..06a72dc0191a022049bbf48ad26e127bb6211c02 100644 (file)
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -300,7 +300,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
  
                 if (ipage) {
                         inline_addr = inline_xattr_addr(ipage);
-                       f2fs_wait_on_page_writeback(ipage, NODE);
+                       f2fs_wait_on_page_writeback(ipage, NODE, true);
                 } else {
                         page = get_node_page(sbi, inode->i_ino);
                         if (IS_ERR(page)) {
@@ -308,7 +308,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
                                 return PTR_ERR(page);
                         }
                         inline_addr = inline_xattr_addr(page);
-                       f2fs_wait_on_page_writeback(page, NODE);
+                       f2fs_wait_on_page_writeback(page, NODE, true);
                 }
                 memcpy(inline_addr, txattr_addr, inline_size);
                 f2fs_put_page(page, 1);
@@ -329,7 +329,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
                         return PTR_ERR(xpage);
                 }
                 f2fs_bug_on(sbi, new_nid);
-               f2fs_wait_on_page_writeback(xpage, NODE);
+               f2fs_wait_on_page_writeback(xpage, NODE, true);
         } else {
                 struct dnode_of_data dn;
                 set_new_dnode(&dn, inode, NULL, NULL, new_nid);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h

index e59c3be921069635c90da255087b12978bf70c9a..f43e6a01a0236ed30674f778d48c6c78a8c2e938 100644 (file)
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -21,7 +21,7 @@
  #define F2FS_BLKSIZE                   4096    /* support only 4KB block */
  #define F2FS_BLKSIZE_BITS              12      /* bits for F2FS_BLKSIZE */
  #define F2FS_MAX_EXTENSION             64      /* # of extension entries */
-#define F2FS_BLK_ALIGN(x)      (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
+#define F2FS_BLK_ALIGN(x)      (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS)
  
  #define NULL_ADDR              ((block_t)0)    /* used as block_t addresses */
  #define NEW_ADDR               ((block_t)-1)   /* used as block_t addresses */
@@ -170,12 +170,12 @@ struct f2fs_extent {
  #define F2FS_INLINE_XATTR_ADDRS        50      /* 200 bytes for inline xattrs */
  #define DEF_ADDRS_PER_INODE    923     /* Address Pointers in an Inode */
  #define DEF_NIDS_PER_INODE     5       /* Node IDs in an Inode */
-#define ADDRS_PER_INODE(fi)    addrs_per_inode(fi)
+#define ADDRS_PER_INODE(inode) addrs_per_inode(inode)
  #define ADDRS_PER_BLOCK                1018    /* Address Pointers in a Direct Block */
  #define NIDS_PER_BLOCK         1018    /* Node IDs in an Indirect Block */
  
-#define ADDRS_PER_PAGE(page, fi)       \
-       (IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
+#define ADDRS_PER_PAGE(page, inode)    \
+       (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK)
  
  #define        NODE_DIR1_BLOCK         (DEF_ADDRS_PER_INODE + 1)
  #define        NODE_DIR2_BLOCK         (DEF_ADDRS_PER_INODE + 2)
@@ -345,7 +345,7 @@ struct f2fs_summary {
  
  struct summary_footer {
         unsigned char entry_type;       /* SUM_TYPE_XXX */
-       __u32 check_sum;                /* summary checksum */
+       __le32 check_sum;               /* summary checksum */
  } __packed;
  
  #define SUM_JOURNAL_SIZE       (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
@@ -358,6 +358,12 @@ struct summary_footer {
                                 sizeof(struct sit_journal_entry))
  #define SIT_JOURNAL_RESERVED   ((SUM_JOURNAL_SIZE - 2) %\
                                 sizeof(struct sit_journal_entry))
+
+/* Reserved area should make size of f2fs_extra_info equals to
+ * that of nat_journal and sit_journal.
+ */
+#define EXTRA_INFO_RESERVED    (SUM_JOURNAL_SIZE - 2 - 8)
+
  /*
   * frequently updated NAT/SIT entries can be stored in the spare area in
   * summary blocks
@@ -387,6 +393,11 @@ struct sit_journal {
         __u8 reserved[SIT_JOURNAL_RESERVED];
  } __packed;
  
+struct f2fs_extra_info {
+       __le64 kbytes_written;
+       __u8 reserved[EXTRA_INFO_RESERVED];
+} __packed;
+
  /* 4KB-sized summary block structure */
  struct f2fs_summary_block {
         struct f2fs_summary entries[ENTRIES_IN_SUM];
@@ -394,10 +405,11 @@ struct f2fs_summary_block {
                 __le16 n_nats;
                 __le16 n_sits;
         };
-       /* spare area is used by NAT or SIT journals */
+       /* spare area is used by NAT or SIT journals or extra info */
         union {
                 struct nat_journal nat_j;
                 struct sit_journal sit_j;
+               struct f2fs_extra_info info;
         };
         struct summary_footer footer;
  } __packed;
author	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 11 Feb 2016 00:05:51 +0000 (11:05 +1100)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 11 Feb 2016 00:05:51 +0000 (11:05 +1100)
Documentation/ABI/testing/sysfs-fs-f2fs		patch \| blob \| history
fs/f2fs/checkpoint.c		patch \| blob \| history
fs/f2fs/data.c		patch \| blob \| history
fs/f2fs/dir.c		patch \| blob \| history
fs/f2fs/extent_cache.c		patch \| blob \| history
fs/f2fs/f2fs.h		patch \| blob \| history
fs/f2fs/file.c		patch \| blob \| history
fs/f2fs/gc.c		patch \| blob \| history
fs/f2fs/inline.c		patch \| blob \| history
fs/f2fs/inode.c		patch \| blob \| history
fs/f2fs/node.c		patch \| blob \| history
fs/f2fs/node.h		patch \| blob \| history
fs/f2fs/recovery.c		patch \| blob \| history
fs/f2fs/segment.c		patch \| blob \| history
fs/f2fs/segment.h		patch \| blob \| history
fs/f2fs/super.c		patch \| blob \| history
fs/f2fs/xattr.c		patch \| blob \| history
include/linux/f2fs_fs.h		patch \| blob \| history