]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/f2fs/segment.c
ipc/msg.c: use freezable blocking call
[karo-tx-linux.git] / fs / f2fs / segment.c
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/swap.h>
18 #include <linux/timer.h>
19
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 #define __reverse_ffz(x) __reverse_ffs(~(x))
27
28 static struct kmem_cache *discard_entry_slab;
29 static struct kmem_cache *sit_entry_set_slab;
30 static struct kmem_cache *inmem_entry_slab;
31
32 static unsigned long __reverse_ulong(unsigned char *str)
33 {
34         unsigned long tmp = 0;
35         int shift = 24, idx = 0;
36
37 #if BITS_PER_LONG == 64
38         shift = 56;
39 #endif
40         while (shift >= 0) {
41                 tmp |= (unsigned long)str[idx++] << shift;
42                 shift -= BITS_PER_BYTE;
43         }
44         return tmp;
45 }
46
47 /*
48  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
49  * MSB and LSB are reversed in a byte by f2fs_set_bit.
50  */
51 static inline unsigned long __reverse_ffs(unsigned long word)
52 {
53         int num = 0;
54
55 #if BITS_PER_LONG == 64
56         if ((word & 0xffffffff00000000UL) == 0)
57                 num += 32;
58         else
59                 word >>= 32;
60 #endif
61         if ((word & 0xffff0000) == 0)
62                 num += 16;
63         else
64                 word >>= 16;
65
66         if ((word & 0xff00) == 0)
67                 num += 8;
68         else
69                 word >>= 8;
70
71         if ((word & 0xf0) == 0)
72                 num += 4;
73         else
74                 word >>= 4;
75
76         if ((word & 0xc) == 0)
77                 num += 2;
78         else
79                 word >>= 2;
80
81         if ((word & 0x2) == 0)
82                 num += 1;
83         return num;
84 }
85
86 /*
87  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
88  * f2fs_set_bit makes MSB and LSB reversed in a byte.
89  * @size must be integral times of unsigned long.
90  * Example:
91  *                             MSB <--> LSB
92  *   f2fs_set_bit(0, bitmap) => 1000 0000
93  *   f2fs_set_bit(7, bitmap) => 0000 0001
94  */
95 static unsigned long __find_rev_next_bit(const unsigned long *addr,
96                         unsigned long size, unsigned long offset)
97 {
98         const unsigned long *p = addr + BIT_WORD(offset);
99         unsigned long result = size;
100         unsigned long tmp;
101
102         if (offset >= size)
103                 return size;
104
105         size -= (offset & ~(BITS_PER_LONG - 1));
106         offset %= BITS_PER_LONG;
107
108         while (1) {
109                 if (*p == 0)
110                         goto pass;
111
112                 tmp = __reverse_ulong((unsigned char *)p);
113
114                 tmp &= ~0UL >> offset;
115                 if (size < BITS_PER_LONG)
116                         tmp &= (~0UL << (BITS_PER_LONG - size));
117                 if (tmp)
118                         goto found;
119 pass:
120                 if (size <= BITS_PER_LONG)
121                         break;
122                 size -= BITS_PER_LONG;
123                 offset = 0;
124                 p++;
125         }
126         return result;
127 found:
128         return result - size + __reverse_ffs(tmp);
129 }
130
131 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
132                         unsigned long size, unsigned long offset)
133 {
134         const unsigned long *p = addr + BIT_WORD(offset);
135         unsigned long result = size;
136         unsigned long tmp;
137
138         if (offset >= size)
139                 return size;
140
141         size -= (offset & ~(BITS_PER_LONG - 1));
142         offset %= BITS_PER_LONG;
143
144         while (1) {
145                 if (*p == ~0UL)
146                         goto pass;
147
148                 tmp = __reverse_ulong((unsigned char *)p);
149
150                 if (offset)
151                         tmp |= ~0UL << (BITS_PER_LONG - offset);
152                 if (size < BITS_PER_LONG)
153                         tmp |= ~0UL >> size;
154                 if (tmp != ~0UL)
155                         goto found;
156 pass:
157                 if (size <= BITS_PER_LONG)
158                         break;
159                 size -= BITS_PER_LONG;
160                 offset = 0;
161                 p++;
162         }
163         return result;
164 found:
165         return result - size + __reverse_ffz(tmp);
166 }
167
168 void register_inmem_page(struct inode *inode, struct page *page)
169 {
170         struct f2fs_inode_info *fi = F2FS_I(inode);
171         struct inmem_pages *new;
172
173         f2fs_trace_pid(page);
174
175         set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
176         SetPagePrivate(page);
177
178         new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
179
180         /* add atomic page indices to the list */
181         new->page = page;
182         INIT_LIST_HEAD(&new->list);
183
184         /* increase reference count with clean state */
185         mutex_lock(&fi->inmem_lock);
186         get_page(page);
187         list_add_tail(&new->list, &fi->inmem_pages);
188         inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
189         mutex_unlock(&fi->inmem_lock);
190
191         trace_f2fs_register_inmem_page(page, INMEM);
192 }
193
194 int commit_inmem_pages(struct inode *inode, bool abort)
195 {
196         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
197         struct f2fs_inode_info *fi = F2FS_I(inode);
198         struct inmem_pages *cur, *tmp;
199         bool submit_bio = false;
200         struct f2fs_io_info fio = {
201                 .sbi = sbi,
202                 .type = DATA,
203                 .rw = WRITE_SYNC | REQ_PRIO,
204                 .encrypted_page = NULL,
205         };
206         int err = 0;
207
208         /*
209          * The abort is true only when f2fs_evict_inode is called.
210          * Basically, the f2fs_evict_inode doesn't produce any data writes, so
211          * that we don't need to call f2fs_balance_fs.
212          * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
213          * inode becomes free by iget_locked in f2fs_iget.
214          */
215         if (!abort) {
216                 f2fs_balance_fs(sbi, true);
217                 f2fs_lock_op(sbi);
218         }
219
220         mutex_lock(&fi->inmem_lock);
221         list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
222                 lock_page(cur->page);
223                 if (!abort) {
224                         if (cur->page->mapping == inode->i_mapping) {
225                                 set_page_dirty(cur->page);
226                                 f2fs_wait_on_page_writeback(cur->page, DATA);
227                                 if (clear_page_dirty_for_io(cur->page))
228                                         inode_dec_dirty_pages(inode);
229                                 trace_f2fs_commit_inmem_page(cur->page, INMEM);
230                                 fio.page = cur->page;
231                                 err = do_write_data_page(&fio);
232                                 if (err) {
233                                         unlock_page(cur->page);
234                                         break;
235                                 }
236                                 clear_cold_data(cur->page);
237                                 submit_bio = true;
238                         }
239                 } else {
240                         ClearPageUptodate(cur->page);
241                         trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
242                 }
243                 set_page_private(cur->page, 0);
244                 ClearPagePrivate(cur->page);
245                 f2fs_put_page(cur->page, 1);
246
247                 list_del(&cur->list);
248                 kmem_cache_free(inmem_entry_slab, cur);
249                 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
250         }
251         mutex_unlock(&fi->inmem_lock);
252
253         if (!abort) {
254                 f2fs_unlock_op(sbi);
255                 if (submit_bio)
256                         f2fs_submit_merged_bio(sbi, DATA, WRITE);
257         }
258         return err;
259 }
260
261 /*
262  * This function balances dirty node and dentry pages.
263  * In addition, it controls garbage collection.
264  */
265 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
266 {
267         if (!need)
268                 return;
269         /*
270          * We should do GC or end up with checkpoint, if there are so many dirty
271          * dir/node pages without enough free segments.
272          */
273         if (has_not_enough_free_secs(sbi, 0)) {
274                 mutex_lock(&sbi->gc_mutex);
275                 f2fs_gc(sbi, false);
276         }
277 }
278
279 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
280 {
281         /* try to shrink extent cache when there is no enough memory */
282         if (!available_free_memory(sbi, EXTENT_CACHE))
283                 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
284
285         /* check the # of cached NAT entries */
286         if (!available_free_memory(sbi, NAT_ENTRIES))
287                 try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
288
289         if (!available_free_memory(sbi, FREE_NIDS))
290                 try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES);
291
292         /* checkpoint is the only way to shrink partial cached entries */
293         if (!available_free_memory(sbi, NAT_ENTRIES) ||
294                         excess_prefree_segs(sbi) ||
295                         !available_free_memory(sbi, INO_ENTRIES) ||
296                         (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
297                 if (test_opt(sbi, DATA_FLUSH))
298                         sync_dirty_inodes(sbi, FILE_INODE);
299                 f2fs_sync_fs(sbi->sb, true);
300                 stat_inc_bg_cp_count(sbi->stat_info);
301         }
302 }
303
304 static int issue_flush_thread(void *data)
305 {
306         struct f2fs_sb_info *sbi = data;
307         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
308         wait_queue_head_t *q = &fcc->flush_wait_queue;
309 repeat:
310         if (kthread_should_stop())
311                 return 0;
312
313         if (!llist_empty(&fcc->issue_list)) {
314                 struct bio *bio;
315                 struct flush_cmd *cmd, *next;
316                 int ret;
317
318                 bio = f2fs_bio_alloc(0);
319
320                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
321                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
322
323                 bio->bi_bdev = sbi->sb->s_bdev;
324                 ret = submit_bio_wait(WRITE_FLUSH, bio);
325
326                 llist_for_each_entry_safe(cmd, next,
327                                           fcc->dispatch_list, llnode) {
328                         cmd->ret = ret;
329                         complete(&cmd->wait);
330                 }
331                 bio_put(bio);
332                 fcc->dispatch_list = NULL;
333         }
334
335         wait_event_interruptible(*q,
336                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
337         goto repeat;
338 }
339
340 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
341 {
342         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
343         struct flush_cmd cmd;
344
345         trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
346                                         test_opt(sbi, FLUSH_MERGE));
347
348         if (test_opt(sbi, NOBARRIER))
349                 return 0;
350
351         if (!test_opt(sbi, FLUSH_MERGE)) {
352                 struct bio *bio = f2fs_bio_alloc(0);
353                 int ret;
354
355                 bio->bi_bdev = sbi->sb->s_bdev;
356                 ret = submit_bio_wait(WRITE_FLUSH, bio);
357                 bio_put(bio);
358                 return ret;
359         }
360
361         init_completion(&cmd.wait);
362
363         llist_add(&cmd.llnode, &fcc->issue_list);
364
365         if (!fcc->dispatch_list)
366                 wake_up(&fcc->flush_wait_queue);
367
368         wait_for_completion(&cmd.wait);
369
370         return cmd.ret;
371 }
372
373 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
374 {
375         dev_t dev = sbi->sb->s_bdev->bd_dev;
376         struct flush_cmd_control *fcc;
377         int err = 0;
378
379         fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
380         if (!fcc)
381                 return -ENOMEM;
382         init_waitqueue_head(&fcc->flush_wait_queue);
383         init_llist_head(&fcc->issue_list);
384         SM_I(sbi)->cmd_control_info = fcc;
385         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
386                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
387         if (IS_ERR(fcc->f2fs_issue_flush)) {
388                 err = PTR_ERR(fcc->f2fs_issue_flush);
389                 kfree(fcc);
390                 SM_I(sbi)->cmd_control_info = NULL;
391                 return err;
392         }
393
394         return err;
395 }
396
397 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
398 {
399         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
400
401         if (fcc && fcc->f2fs_issue_flush)
402                 kthread_stop(fcc->f2fs_issue_flush);
403         kfree(fcc);
404         SM_I(sbi)->cmd_control_info = NULL;
405 }
406
407 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
408                 enum dirty_type dirty_type)
409 {
410         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
411
412         /* need not be added */
413         if (IS_CURSEG(sbi, segno))
414                 return;
415
416         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
417                 dirty_i->nr_dirty[dirty_type]++;
418
419         if (dirty_type == DIRTY) {
420                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
421                 enum dirty_type t = sentry->type;
422
423                 if (unlikely(t >= DIRTY)) {
424                         f2fs_bug_on(sbi, 1);
425                         return;
426                 }
427                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
428                         dirty_i->nr_dirty[t]++;
429         }
430 }
431
432 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
433                 enum dirty_type dirty_type)
434 {
435         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
436
437         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
438                 dirty_i->nr_dirty[dirty_type]--;
439
440         if (dirty_type == DIRTY) {
441                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
442                 enum dirty_type t = sentry->type;
443
444                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
445                         dirty_i->nr_dirty[t]--;
446
447                 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
448                         clear_bit(GET_SECNO(sbi, segno),
449                                                 dirty_i->victim_secmap);
450         }
451 }
452
453 /*
454  * Should not occur error such as -ENOMEM.
455  * Adding dirty entry into seglist is not critical operation.
456  * If a given segment is one of current working segments, it won't be added.
457  */
458 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
459 {
460         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
461         unsigned short valid_blocks;
462
463         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
464                 return;
465
466         mutex_lock(&dirty_i->seglist_lock);
467
468         valid_blocks = get_valid_blocks(sbi, segno, 0);
469
470         if (valid_blocks == 0) {
471                 __locate_dirty_segment(sbi, segno, PRE);
472                 __remove_dirty_segment(sbi, segno, DIRTY);
473         } else if (valid_blocks < sbi->blocks_per_seg) {
474                 __locate_dirty_segment(sbi, segno, DIRTY);
475         } else {
476                 /* Recovery routine with SSR needs this */
477                 __remove_dirty_segment(sbi, segno, DIRTY);
478         }
479
480         mutex_unlock(&dirty_i->seglist_lock);
481 }
482
483 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
484                                 block_t blkstart, block_t blklen)
485 {
486         sector_t start = SECTOR_FROM_BLOCK(blkstart);
487         sector_t len = SECTOR_FROM_BLOCK(blklen);
488         struct seg_entry *se;
489         unsigned int offset;
490         block_t i;
491
492         for (i = blkstart; i < blkstart + blklen; i++) {
493                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
494                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
495
496                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
497                         sbi->discard_blks--;
498         }
499         trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
500         return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
501 }
502
503 bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
504 {
505         int err = -ENOTSUPP;
506
507         if (test_opt(sbi, DISCARD)) {
508                 struct seg_entry *se = get_seg_entry(sbi,
509                                 GET_SEGNO(sbi, blkaddr));
510                 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
511
512                 if (f2fs_test_bit(offset, se->discard_map))
513                         return false;
514
515                 err = f2fs_issue_discard(sbi, blkaddr, 1);
516         }
517
518         if (err) {
519                 update_meta_page(sbi, NULL, blkaddr);
520                 return true;
521         }
522         return false;
523 }
524
525 static void __add_discard_entry(struct f2fs_sb_info *sbi,
526                 struct cp_control *cpc, struct seg_entry *se,
527                 unsigned int start, unsigned int end)
528 {
529         struct list_head *head = &SM_I(sbi)->discard_list;
530         struct discard_entry *new, *last;
531
532         if (!list_empty(head)) {
533                 last = list_last_entry(head, struct discard_entry, list);
534                 if (START_BLOCK(sbi, cpc->trim_start) + start ==
535                                                 last->blkaddr + last->len) {
536                         last->len += end - start;
537                         goto done;
538                 }
539         }
540
541         new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
542         INIT_LIST_HEAD(&new->list);
543         new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
544         new->len = end - start;
545         list_add_tail(&new->list, head);
546 done:
547         SM_I(sbi)->nr_discards += end - start;
548 }
549
550 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
551 {
552         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
553         int max_blocks = sbi->blocks_per_seg;
554         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
555         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
556         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
557         unsigned long *discard_map = (unsigned long *)se->discard_map;
558         unsigned long *dmap = SIT_I(sbi)->tmp_map;
559         unsigned int start = 0, end = -1;
560         bool force = (cpc->reason == CP_DISCARD);
561         int i;
562
563         if (se->valid_blocks == max_blocks)
564                 return;
565
566         if (!force) {
567                 if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
568                     SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
569                         return;
570         }
571
572         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
573         for (i = 0; i < entries; i++)
574                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
575                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
576
577         while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
578                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
579                 if (start >= max_blocks)
580                         break;
581
582                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
583                 __add_discard_entry(sbi, cpc, se, start, end);
584         }
585 }
586
587 void release_discard_addrs(struct f2fs_sb_info *sbi)
588 {
589         struct list_head *head = &(SM_I(sbi)->discard_list);
590         struct discard_entry *entry, *this;
591
592         /* drop caches */
593         list_for_each_entry_safe(entry, this, head, list) {
594                 list_del(&entry->list);
595                 kmem_cache_free(discard_entry_slab, entry);
596         }
597 }
598
599 /*
600  * Should call clear_prefree_segments after checkpoint is done.
601  */
602 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
603 {
604         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
605         unsigned int segno;
606
607         mutex_lock(&dirty_i->seglist_lock);
608         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
609                 __set_test_and_free(sbi, segno);
610         mutex_unlock(&dirty_i->seglist_lock);
611 }
612
613 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
614 {
615         struct list_head *head = &(SM_I(sbi)->discard_list);
616         struct discard_entry *entry, *this;
617         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
618         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
619         unsigned int start = 0, end = -1;
620
621         mutex_lock(&dirty_i->seglist_lock);
622
623         while (1) {
624                 int i;
625                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
626                 if (start >= MAIN_SEGS(sbi))
627                         break;
628                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
629                                                                 start + 1);
630
631                 for (i = start; i < end; i++)
632                         clear_bit(i, prefree_map);
633
634                 dirty_i->nr_dirty[PRE] -= end - start;
635
636                 if (!test_opt(sbi, DISCARD))
637                         continue;
638
639                 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
640                                 (end - start) << sbi->log_blocks_per_seg);
641         }
642         mutex_unlock(&dirty_i->seglist_lock);
643
644         /* send small discards */
645         list_for_each_entry_safe(entry, this, head, list) {
646                 if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen)
647                         goto skip;
648                 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
649                 cpc->trimmed += entry->len;
650 skip:
651                 list_del(&entry->list);
652                 SM_I(sbi)->nr_discards -= entry->len;
653                 kmem_cache_free(discard_entry_slab, entry);
654         }
655 }
656
657 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
658 {
659         struct sit_info *sit_i = SIT_I(sbi);
660
661         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
662                 sit_i->dirty_sentries++;
663                 return false;
664         }
665
666         return true;
667 }
668
669 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
670                                         unsigned int segno, int modified)
671 {
672         struct seg_entry *se = get_seg_entry(sbi, segno);
673         se->type = type;
674         if (modified)
675                 __mark_sit_entry_dirty(sbi, segno);
676 }
677
678 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
679 {
680         struct seg_entry *se;
681         unsigned int segno, offset;
682         long int new_vblocks;
683
684         segno = GET_SEGNO(sbi, blkaddr);
685
686         se = get_seg_entry(sbi, segno);
687         new_vblocks = se->valid_blocks + del;
688         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
689
690         f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
691                                 (new_vblocks > sbi->blocks_per_seg)));
692
693         se->valid_blocks = new_vblocks;
694         se->mtime = get_mtime(sbi);
695         SIT_I(sbi)->max_mtime = se->mtime;
696
697         /* Update valid block bitmap */
698         if (del > 0) {
699                 if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
700                         f2fs_bug_on(sbi, 1);
701                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
702                         sbi->discard_blks--;
703         } else {
704                 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
705                         f2fs_bug_on(sbi, 1);
706                 if (f2fs_test_and_clear_bit(offset, se->discard_map))
707                         sbi->discard_blks++;
708         }
709         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
710                 se->ckpt_valid_blocks += del;
711
712         __mark_sit_entry_dirty(sbi, segno);
713
714         /* update total number of valid blocks to be written in ckpt area */
715         SIT_I(sbi)->written_valid_blocks += del;
716
717         if (sbi->segs_per_sec > 1)
718                 get_sec_entry(sbi, segno)->valid_blocks += del;
719 }
720
721 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
722 {
723         update_sit_entry(sbi, new, 1);
724         if (GET_SEGNO(sbi, old) != NULL_SEGNO)
725                 update_sit_entry(sbi, old, -1);
726
727         locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
728         locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
729 }
730
731 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
732 {
733         unsigned int segno = GET_SEGNO(sbi, addr);
734         struct sit_info *sit_i = SIT_I(sbi);
735
736         f2fs_bug_on(sbi, addr == NULL_ADDR);
737         if (addr == NEW_ADDR)
738                 return;
739
740         /* add it into sit main buffer */
741         mutex_lock(&sit_i->sentry_lock);
742
743         update_sit_entry(sbi, addr, -1);
744
745         /* add it into dirty seglist */
746         locate_dirty_segment(sbi, segno);
747
748         mutex_unlock(&sit_i->sentry_lock);
749 }
750
751 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
752 {
753         struct sit_info *sit_i = SIT_I(sbi);
754         unsigned int segno, offset;
755         struct seg_entry *se;
756         bool is_cp = false;
757
758         if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
759                 return true;
760
761         mutex_lock(&sit_i->sentry_lock);
762
763         segno = GET_SEGNO(sbi, blkaddr);
764         se = get_seg_entry(sbi, segno);
765         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
766
767         if (f2fs_test_bit(offset, se->ckpt_valid_map))
768                 is_cp = true;
769
770         mutex_unlock(&sit_i->sentry_lock);
771
772         return is_cp;
773 }
774
775 /*
776  * This function should be resided under the curseg_mutex lock
777  */
778 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
779                                         struct f2fs_summary *sum)
780 {
781         struct curseg_info *curseg = CURSEG_I(sbi, type);
782         void *addr = curseg->sum_blk;
783         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
784         memcpy(addr, sum, sizeof(struct f2fs_summary));
785 }
786
787 /*
788  * Calculate the number of current summary pages for writing
789  */
790 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
791 {
792         int valid_sum_count = 0;
793         int i, sum_in_page;
794
795         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
796                 if (sbi->ckpt->alloc_type[i] == SSR)
797                         valid_sum_count += sbi->blocks_per_seg;
798                 else {
799                         if (for_ra)
800                                 valid_sum_count += le16_to_cpu(
801                                         F2FS_CKPT(sbi)->cur_data_blkoff[i]);
802                         else
803                                 valid_sum_count += curseg_blkoff(sbi, i);
804                 }
805         }
806
807         sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
808                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
809         if (valid_sum_count <= sum_in_page)
810                 return 1;
811         else if ((valid_sum_count - sum_in_page) <=
812                 (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
813                 return 2;
814         return 3;
815 }
816
817 /*
818  * Caller should put this summary page
819  */
820 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
821 {
822         return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
823 }
824
825 void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
826 {
827         struct page *page = grab_meta_page(sbi, blk_addr);
828         void *dst = page_address(page);
829
830         if (src)
831                 memcpy(dst, src, PAGE_CACHE_SIZE);
832         else
833                 memset(dst, 0, PAGE_CACHE_SIZE);
834         set_page_dirty(page);
835         f2fs_put_page(page, 1);
836 }
837
838 static void write_sum_page(struct f2fs_sb_info *sbi,
839                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
840 {
841         update_meta_page(sbi, (void *)sum_blk, blk_addr);
842 }
843
844 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
845 {
846         struct curseg_info *curseg = CURSEG_I(sbi, type);
847         unsigned int segno = curseg->segno + 1;
848         struct free_segmap_info *free_i = FREE_I(sbi);
849
850         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
851                 return !test_bit(segno, free_i->free_segmap);
852         return 0;
853 }
854
855 /*
856  * Find a new segment from the free segments bitmap to right order
857  * This function should be returned with success, otherwise BUG
858  */
859 static void get_new_segment(struct f2fs_sb_info *sbi,
860                         unsigned int *newseg, bool new_sec, int dir)
861 {
862         struct free_segmap_info *free_i = FREE_I(sbi);
863         unsigned int segno, secno, zoneno;
864         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
865         unsigned int hint = *newseg / sbi->segs_per_sec;
866         unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
867         unsigned int left_start = hint;
868         bool init = true;
869         int go_left = 0;
870         int i;
871
872         spin_lock(&free_i->segmap_lock);
873
874         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
875                 segno = find_next_zero_bit(free_i->free_segmap,
876                                         MAIN_SEGS(sbi), *newseg + 1);
877                 if (segno - *newseg < sbi->segs_per_sec -
878                                         (*newseg % sbi->segs_per_sec))
879                         goto got_it;
880         }
881 find_other_zone:
882         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
883         if (secno >= MAIN_SECS(sbi)) {
884                 if (dir == ALLOC_RIGHT) {
885                         secno = find_next_zero_bit(free_i->free_secmap,
886                                                         MAIN_SECS(sbi), 0);
887                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
888                 } else {
889                         go_left = 1;
890                         left_start = hint - 1;
891                 }
892         }
893         if (go_left == 0)
894                 goto skip_left;
895
896         while (test_bit(left_start, free_i->free_secmap)) {
897                 if (left_start > 0) {
898                         left_start--;
899                         continue;
900                 }
901                 left_start = find_next_zero_bit(free_i->free_secmap,
902                                                         MAIN_SECS(sbi), 0);
903                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
904                 break;
905         }
906         secno = left_start;
907 skip_left:
908         hint = secno;
909         segno = secno * sbi->segs_per_sec;
910         zoneno = secno / sbi->secs_per_zone;
911
912         /* give up on finding another zone */
913         if (!init)
914                 goto got_it;
915         if (sbi->secs_per_zone == 1)
916                 goto got_it;
917         if (zoneno == old_zoneno)
918                 goto got_it;
919         if (dir == ALLOC_LEFT) {
920                 if (!go_left && zoneno + 1 >= total_zones)
921                         goto got_it;
922                 if (go_left && zoneno == 0)
923                         goto got_it;
924         }
925         for (i = 0; i < NR_CURSEG_TYPE; i++)
926                 if (CURSEG_I(sbi, i)->zone == zoneno)
927                         break;
928
929         if (i < NR_CURSEG_TYPE) {
930                 /* zone is in user, try another */
931                 if (go_left)
932                         hint = zoneno * sbi->secs_per_zone - 1;
933                 else if (zoneno + 1 >= total_zones)
934                         hint = 0;
935                 else
936                         hint = (zoneno + 1) * sbi->secs_per_zone;
937                 init = false;
938                 goto find_other_zone;
939         }
940 got_it:
941         /* set it as dirty segment in free segmap */
942         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
943         __set_inuse(sbi, segno);
944         *newseg = segno;
945         spin_unlock(&free_i->segmap_lock);
946 }
947
948 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
949 {
950         struct curseg_info *curseg = CURSEG_I(sbi, type);
951         struct summary_footer *sum_footer;
952
953         curseg->segno = curseg->next_segno;
954         curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
955         curseg->next_blkoff = 0;
956         curseg->next_segno = NULL_SEGNO;
957
958         sum_footer = &(curseg->sum_blk->footer);
959         memset(sum_footer, 0, sizeof(struct summary_footer));
960         if (IS_DATASEG(type))
961                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
962         if (IS_NODESEG(type))
963                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
964         __set_sit_entry_type(sbi, type, curseg->segno, modified);
965 }
966
967 /*
968  * Allocate a current working segment.
969  * This function always allocates a free segment in LFS manner.
970  */
971 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
972 {
973         struct curseg_info *curseg = CURSEG_I(sbi, type);
974         unsigned int segno = curseg->segno;
975         int dir = ALLOC_LEFT;
976
977         write_sum_page(sbi, curseg->sum_blk,
978                                 GET_SUM_BLOCK(sbi, segno));
979         if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
980                 dir = ALLOC_RIGHT;
981
982         if (test_opt(sbi, NOHEAP))
983                 dir = ALLOC_RIGHT;
984
985         get_new_segment(sbi, &segno, new_sec, dir);
986         curseg->next_segno = segno;
987         reset_curseg(sbi, type, 1);
988         curseg->alloc_type = LFS;
989 }
990
991 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
992                         struct curseg_info *seg, block_t start)
993 {
994         struct seg_entry *se = get_seg_entry(sbi, seg->segno);
995         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
996         unsigned long *target_map = SIT_I(sbi)->tmp_map;
997         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
998         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
999         int i, pos;
1000
1001         for (i = 0; i < entries; i++)
1002                 target_map[i] = ckpt_map[i] | cur_map[i];
1003
1004         pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
1005
1006         seg->next_blkoff = pos;
1007 }
1008
1009 /*
1010  * If a segment is written by LFS manner, next block offset is just obtained
1011  * by increasing the current block offset. However, if a segment is written by
1012  * SSR manner, next block offset obtained by calling __next_free_blkoff
1013  */
1014 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
1015                                 struct curseg_info *seg)
1016 {
1017         if (seg->alloc_type == SSR)
1018                 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
1019         else
1020                 seg->next_blkoff++;
1021 }
1022
1023 /*
1024  * This function always allocates a used segment(from dirty seglist) by SSR
1025  * manner, so it should recover the existing segment information of valid blocks
1026  */
1027 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
1028 {
1029         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1030         struct curseg_info *curseg = CURSEG_I(sbi, type);
1031         unsigned int new_segno = curseg->next_segno;
1032         struct f2fs_summary_block *sum_node;
1033         struct page *sum_page;
1034
1035         write_sum_page(sbi, curseg->sum_blk,
1036                                 GET_SUM_BLOCK(sbi, curseg->segno));
1037         __set_test_and_inuse(sbi, new_segno);
1038
1039         mutex_lock(&dirty_i->seglist_lock);
1040         __remove_dirty_segment(sbi, new_segno, PRE);
1041         __remove_dirty_segment(sbi, new_segno, DIRTY);
1042         mutex_unlock(&dirty_i->seglist_lock);
1043
1044         reset_curseg(sbi, type, 1);
1045         curseg->alloc_type = SSR;
1046         __next_free_blkoff(sbi, curseg, 0);
1047
1048         if (reuse) {
1049                 sum_page = get_sum_page(sbi, new_segno);
1050                 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
1051                 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
1052                 f2fs_put_page(sum_page, 1);
1053         }
1054 }
1055
1056 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1057 {
1058         struct curseg_info *curseg = CURSEG_I(sbi, type);
1059         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
1060
1061         if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
1062                 return v_ops->get_victim(sbi,
1063                                 &(curseg)->next_segno, BG_GC, type, SSR);
1064
1065         /* For data segments, let's do SSR more intensively */
1066         for (; type >= CURSEG_HOT_DATA; type--)
1067                 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1068                                                 BG_GC, type, SSR))
1069                         return 1;
1070         return 0;
1071 }
1072
1073 /*
1074  * flush out current segment and replace it with new segment
1075  * This function should be returned with success, otherwise BUG
1076  */
1077 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1078                                                 int type, bool force)
1079 {
1080         struct curseg_info *curseg = CURSEG_I(sbi, type);
1081
1082         if (force)
1083                 new_curseg(sbi, type, true);
1084         else if (type == CURSEG_WARM_NODE)
1085                 new_curseg(sbi, type, false);
1086         else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1087                 new_curseg(sbi, type, false);
1088         else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1089                 change_curseg(sbi, type, true);
1090         else
1091                 new_curseg(sbi, type, false);
1092
1093         stat_inc_seg_type(sbi, curseg);
1094 }
1095
1096 static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
1097 {
1098         struct curseg_info *curseg = CURSEG_I(sbi, type);
1099         unsigned int old_segno;
1100
1101         old_segno = curseg->segno;
1102         SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
1103         locate_dirty_segment(sbi, old_segno);
1104 }
1105
1106 void allocate_new_segments(struct f2fs_sb_info *sbi)
1107 {
1108         int i;
1109
1110         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
1111                 __allocate_new_segments(sbi, i);
1112 }
1113
1114 static const struct segment_allocation default_salloc_ops = {
1115         .allocate_segment = allocate_segment_by_default,
1116 };
1117
1118 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1119 {
1120         __u64 start = F2FS_BYTES_TO_BLK(range->start);
1121         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1122         unsigned int start_segno, end_segno;
1123         struct cp_control cpc;
1124         int err = 0;
1125
1126         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
1127                 return -EINVAL;
1128
1129         cpc.trimmed = 0;
1130         if (end <= MAIN_BLKADDR(sbi))
1131                 goto out;
1132
1133         /* start/end segment number in main_area */
1134         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1135         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1136                                                 GET_SEGNO(sbi, end);
1137         cpc.reason = CP_DISCARD;
1138         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
1139
1140         /* do checkpoint to issue discard commands safely */
1141         for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
1142                 cpc.trim_start = start_segno;
1143
1144                 if (sbi->discard_blks == 0)
1145                         break;
1146                 else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
1147                         cpc.trim_end = end_segno;
1148                 else
1149                         cpc.trim_end = min_t(unsigned int,
1150                                 rounddown(start_segno +
1151                                 BATCHED_TRIM_SEGMENTS(sbi),
1152                                 sbi->segs_per_sec) - 1, end_segno);
1153
1154                 mutex_lock(&sbi->gc_mutex);
1155                 err = write_checkpoint(sbi, &cpc);
1156                 mutex_unlock(&sbi->gc_mutex);
1157         }
1158 out:
1159         range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
1160         return err;
1161 }
1162
1163 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1164 {
1165         struct curseg_info *curseg = CURSEG_I(sbi, type);
1166         if (curseg->next_blkoff < sbi->blocks_per_seg)
1167                 return true;
1168         return false;
1169 }
1170
1171 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1172 {
1173         if (p_type == DATA)
1174                 return CURSEG_HOT_DATA;
1175         else
1176                 return CURSEG_HOT_NODE;
1177 }
1178
1179 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1180 {
1181         if (p_type == DATA) {
1182                 struct inode *inode = page->mapping->host;
1183
1184                 if (S_ISDIR(inode->i_mode))
1185                         return CURSEG_HOT_DATA;
1186                 else
1187                         return CURSEG_COLD_DATA;
1188         } else {
1189                 if (IS_DNODE(page) && is_cold_node(page))
1190                         return CURSEG_WARM_NODE;
1191                 else
1192                         return CURSEG_COLD_NODE;
1193         }
1194 }
1195
1196 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1197 {
1198         if (p_type == DATA) {
1199                 struct inode *inode = page->mapping->host;
1200
1201                 if (S_ISDIR(inode->i_mode))
1202                         return CURSEG_HOT_DATA;
1203                 else if (is_cold_data(page) || file_is_cold(inode))
1204                         return CURSEG_COLD_DATA;
1205                 else
1206                         return CURSEG_WARM_DATA;
1207         } else {
1208                 if (IS_DNODE(page))
1209                         return is_cold_node(page) ? CURSEG_WARM_NODE :
1210                                                 CURSEG_HOT_NODE;
1211                 else
1212                         return CURSEG_COLD_NODE;
1213         }
1214 }
1215
1216 static int __get_segment_type(struct page *page, enum page_type p_type)
1217 {
1218         switch (F2FS_P_SB(page)->active_logs) {
1219         case 2:
1220                 return __get_segment_type_2(page, p_type);
1221         case 4:
1222                 return __get_segment_type_4(page, p_type);
1223         }
1224         /* NR_CURSEG_TYPE(6) logs by default */
1225         f2fs_bug_on(F2FS_P_SB(page),
1226                 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1227         return __get_segment_type_6(page, p_type);
1228 }
1229
1230 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1231                 block_t old_blkaddr, block_t *new_blkaddr,
1232                 struct f2fs_summary *sum, int type)
1233 {
1234         struct sit_info *sit_i = SIT_I(sbi);
1235         struct curseg_info *curseg;
1236         bool direct_io = (type == CURSEG_DIRECT_IO);
1237
1238         type = direct_io ? CURSEG_WARM_DATA : type;
1239
1240         curseg = CURSEG_I(sbi, type);
1241
1242         mutex_lock(&curseg->curseg_mutex);
1243         mutex_lock(&sit_i->sentry_lock);
1244
1245         /* direct_io'ed data is aligned to the segment for better performance */
1246         if (direct_io && curseg->next_blkoff &&
1247                                 !has_not_enough_free_secs(sbi, 0))
1248                 __allocate_new_segments(sbi, type);
1249
1250         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1251
1252         /*
1253          * __add_sum_entry should be resided under the curseg_mutex
1254          * because, this function updates a summary entry in the
1255          * current summary block.
1256          */
1257         __add_sum_entry(sbi, type, sum);
1258
1259         __refresh_next_blkoff(sbi, curseg);
1260
1261         stat_inc_block_count(sbi, curseg);
1262
1263         if (!__has_curseg_space(sbi, type))
1264                 sit_i->s_ops->allocate_segment(sbi, type, false);
1265         /*
1266          * SIT information should be updated before segment allocation,
1267          * since SSR needs latest valid block information.
1268          */
1269         refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1270
1271         mutex_unlock(&sit_i->sentry_lock);
1272
1273         if (page && IS_NODESEG(type))
1274                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1275
1276         mutex_unlock(&curseg->curseg_mutex);
1277 }
1278
1279 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
1280 {
1281         int type = __get_segment_type(fio->page, fio->type);
1282
1283         allocate_data_block(fio->sbi, fio->page, fio->blk_addr,
1284                                         &fio->blk_addr, sum, type);
1285
1286         /* writeout dirty page into bdev */
1287         f2fs_submit_page_mbio(fio);
1288 }
1289
1290 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1291 {
1292         struct f2fs_io_info fio = {
1293                 .sbi = sbi,
1294                 .type = META,
1295                 .rw = WRITE_SYNC | REQ_META | REQ_PRIO,
1296                 .blk_addr = page->index,
1297                 .page = page,
1298                 .encrypted_page = NULL,
1299         };
1300
1301         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
1302                 fio.rw &= ~REQ_META;
1303
1304         set_page_writeback(page);
1305         f2fs_submit_page_mbio(&fio);
1306 }
1307
1308 void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
1309 {
1310         struct f2fs_summary sum;
1311
1312         set_summary(&sum, nid, 0, 0);
1313         do_write_page(&sum, fio);
1314 }
1315
1316 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
1317 {
1318         struct f2fs_sb_info *sbi = fio->sbi;
1319         struct f2fs_summary sum;
1320         struct node_info ni;
1321
1322         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1323         get_node_info(sbi, dn->nid, &ni);
1324         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1325         do_write_page(&sum, fio);
1326         dn->data_blkaddr = fio->blk_addr;
1327 }
1328
1329 void rewrite_data_page(struct f2fs_io_info *fio)
1330 {
1331         stat_inc_inplace_blocks(fio->sbi);
1332         f2fs_submit_page_mbio(fio);
1333 }
1334
1335 static void __f2fs_replace_block(struct f2fs_sb_info *sbi,
1336                                 struct f2fs_summary *sum,
1337                                 block_t old_blkaddr, block_t new_blkaddr,
1338                                 bool recover_curseg)
1339 {
1340         struct sit_info *sit_i = SIT_I(sbi);
1341         struct curseg_info *curseg;
1342         unsigned int segno, old_cursegno;
1343         struct seg_entry *se;
1344         int type;
1345         unsigned short old_blkoff;
1346
1347         segno = GET_SEGNO(sbi, new_blkaddr);
1348         se = get_seg_entry(sbi, segno);
1349         type = se->type;
1350
1351         if (!recover_curseg) {
1352                 /* for recovery flow */
1353                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1354                         if (old_blkaddr == NULL_ADDR)
1355                                 type = CURSEG_COLD_DATA;
1356                         else
1357                                 type = CURSEG_WARM_DATA;
1358                 }
1359         } else {
1360                 if (!IS_CURSEG(sbi, segno))
1361                         type = CURSEG_WARM_DATA;
1362         }
1363
1364         curseg = CURSEG_I(sbi, type);
1365
1366         mutex_lock(&curseg->curseg_mutex);
1367         mutex_lock(&sit_i->sentry_lock);
1368
1369         old_cursegno = curseg->segno;
1370         old_blkoff = curseg->next_blkoff;
1371
1372         /* change the current segment */
1373         if (segno != curseg->segno) {
1374                 curseg->next_segno = segno;
1375                 change_curseg(sbi, type, true);
1376         }
1377
1378         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1379         __add_sum_entry(sbi, type, sum);
1380
1381         if (!recover_curseg)
1382                 update_sit_entry(sbi, new_blkaddr, 1);
1383         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1384                 update_sit_entry(sbi, old_blkaddr, -1);
1385
1386         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
1387         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
1388
1389         locate_dirty_segment(sbi, old_cursegno);
1390
1391         if (recover_curseg) {
1392                 if (old_cursegno != curseg->segno) {
1393                         curseg->next_segno = old_cursegno;
1394                         change_curseg(sbi, type, true);
1395                 }
1396                 curseg->next_blkoff = old_blkoff;
1397         }
1398
1399         mutex_unlock(&sit_i->sentry_lock);
1400         mutex_unlock(&curseg->curseg_mutex);
1401 }
1402
1403 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
1404                                 block_t old_addr, block_t new_addr,
1405                                 unsigned char version, bool recover_curseg)
1406 {
1407         struct f2fs_summary sum;
1408
1409         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
1410
1411         __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg);
1412
1413         dn->data_blkaddr = new_addr;
1414         set_data_blkaddr(dn);
1415         f2fs_update_extent_cache(dn);
1416 }
1417
1418 static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1419                                         struct page *page, enum page_type type)
1420 {
1421         enum page_type btype = PAGE_TYPE_OF_BIO(type);
1422         struct f2fs_bio_info *io = &sbi->write_io[btype];
1423         struct bio_vec *bvec;
1424         struct page *target;
1425         int i;
1426
1427         down_read(&io->io_rwsem);
1428         if (!io->bio) {
1429                 up_read(&io->io_rwsem);
1430                 return false;
1431         }
1432
1433         bio_for_each_segment_all(bvec, io->bio, i) {
1434
1435                 if (bvec->bv_page->mapping) {
1436                         target = bvec->bv_page;
1437                 } else {
1438                         struct f2fs_crypto_ctx *ctx;
1439
1440                         /* encrypted page */
1441                         ctx = (struct f2fs_crypto_ctx *)page_private(
1442                                                                 bvec->bv_page);
1443                         target = ctx->w.control_page;
1444                 }
1445
1446                 if (page == target) {
1447                         up_read(&io->io_rwsem);
1448                         return true;
1449                 }
1450         }
1451
1452         up_read(&io->io_rwsem);
1453         return false;
1454 }
1455
1456 void f2fs_wait_on_page_writeback(struct page *page,
1457                                 enum page_type type)
1458 {
1459         if (PageWriteback(page)) {
1460                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1461
1462                 if (is_merged_page(sbi, page, type))
1463                         f2fs_submit_merged_bio(sbi, type, WRITE);
1464                 wait_on_page_writeback(page);
1465         }
1466 }
1467
1468 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
1469                                                         block_t blkaddr)
1470 {
1471         struct page *cpage;
1472
1473         if (blkaddr == NEW_ADDR)
1474                 return;
1475
1476         f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
1477
1478         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
1479         if (cpage) {
1480                 f2fs_wait_on_page_writeback(cpage, DATA);
1481                 f2fs_put_page(cpage, 1);
1482         }
1483 }
1484
1485 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1486 {
1487         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1488         struct curseg_info *seg_i;
1489         unsigned char *kaddr;
1490         struct page *page;
1491         block_t start;
1492         int i, j, offset;
1493
1494         start = start_sum_block(sbi);
1495
1496         page = get_meta_page(sbi, start++);
1497         kaddr = (unsigned char *)page_address(page);
1498
1499         /* Step 1: restore nat cache */
1500         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1501         memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1502
1503         /* Step 2: restore sit cache */
1504         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1505         memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1506                                                 SUM_JOURNAL_SIZE);
1507         offset = 2 * SUM_JOURNAL_SIZE;
1508
1509         /* Step 3: restore summary entries */
1510         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1511                 unsigned short blk_off;
1512                 unsigned int segno;
1513
1514                 seg_i = CURSEG_I(sbi, i);
1515                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1516                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1517                 seg_i->next_segno = segno;
1518                 reset_curseg(sbi, i, 0);
1519                 seg_i->alloc_type = ckpt->alloc_type[i];
1520                 seg_i->next_blkoff = blk_off;
1521
1522                 if (seg_i->alloc_type == SSR)
1523                         blk_off = sbi->blocks_per_seg;
1524
1525                 for (j = 0; j < blk_off; j++) {
1526                         struct f2fs_summary *s;
1527                         s = (struct f2fs_summary *)(kaddr + offset);
1528                         seg_i->sum_blk->entries[j] = *s;
1529                         offset += SUMMARY_SIZE;
1530                         if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1531                                                 SUM_FOOTER_SIZE)
1532                                 continue;
1533
1534                         f2fs_put_page(page, 1);
1535                         page = NULL;
1536
1537                         page = get_meta_page(sbi, start++);
1538                         kaddr = (unsigned char *)page_address(page);
1539                         offset = 0;
1540                 }
1541         }
1542         f2fs_put_page(page, 1);
1543         return 0;
1544 }
1545
1546 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1547 {
1548         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1549         struct f2fs_summary_block *sum;
1550         struct curseg_info *curseg;
1551         struct page *new;
1552         unsigned short blk_off;
1553         unsigned int segno = 0;
1554         block_t blk_addr = 0;
1555
1556         /* get segment number and block addr */
1557         if (IS_DATASEG(type)) {
1558                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1559                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1560                                                         CURSEG_HOT_DATA]);
1561                 if (__exist_node_summaries(sbi))
1562                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1563                 else
1564                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1565         } else {
1566                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1567                                                         CURSEG_HOT_NODE]);
1568                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1569                                                         CURSEG_HOT_NODE]);
1570                 if (__exist_node_summaries(sbi))
1571                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1572                                                         type - CURSEG_HOT_NODE);
1573                 else
1574                         blk_addr = GET_SUM_BLOCK(sbi, segno);
1575         }
1576
1577         new = get_meta_page(sbi, blk_addr);
1578         sum = (struct f2fs_summary_block *)page_address(new);
1579
1580         if (IS_NODESEG(type)) {
1581                 if (__exist_node_summaries(sbi)) {
1582                         struct f2fs_summary *ns = &sum->entries[0];
1583                         int i;
1584                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1585                                 ns->version = 0;
1586                                 ns->ofs_in_node = 0;
1587                         }
1588                 } else {
1589                         int err;
1590
1591                         err = restore_node_summary(sbi, segno, sum);
1592                         if (err) {
1593                                 f2fs_put_page(new, 1);
1594                                 return err;
1595                         }
1596                 }
1597         }
1598
1599         /* set uncompleted segment to curseg */
1600         curseg = CURSEG_I(sbi, type);
1601         mutex_lock(&curseg->curseg_mutex);
1602         memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1603         curseg->next_segno = segno;
1604         reset_curseg(sbi, type, 0);
1605         curseg->alloc_type = ckpt->alloc_type[type];
1606         curseg->next_blkoff = blk_off;
1607         mutex_unlock(&curseg->curseg_mutex);
1608         f2fs_put_page(new, 1);
1609         return 0;
1610 }
1611
1612 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1613 {
1614         int type = CURSEG_HOT_DATA;
1615         int err;
1616
1617         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1618                 int npages = npages_for_summary_flush(sbi, true);
1619
1620                 if (npages >= 2)
1621                         ra_meta_pages(sbi, start_sum_block(sbi), npages,
1622                                                         META_CP, true);
1623
1624                 /* restore for compacted data summary */
1625                 if (read_compacted_summaries(sbi))
1626                         return -EINVAL;
1627                 type = CURSEG_HOT_NODE;
1628         }
1629
1630         if (__exist_node_summaries(sbi))
1631                 ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
1632                                         NR_CURSEG_TYPE - type, META_CP, true);
1633
1634         for (; type <= CURSEG_COLD_NODE; type++) {
1635                 err = read_normal_summaries(sbi, type);
1636                 if (err)
1637                         return err;
1638         }
1639
1640         return 0;
1641 }
1642
1643 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1644 {
1645         struct page *page;
1646         unsigned char *kaddr;
1647         struct f2fs_summary *summary;
1648         struct curseg_info *seg_i;
1649         int written_size = 0;
1650         int i, j;
1651
1652         page = grab_meta_page(sbi, blkaddr++);
1653         kaddr = (unsigned char *)page_address(page);
1654
1655         /* Step 1: write nat cache */
1656         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1657         memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1658         written_size += SUM_JOURNAL_SIZE;
1659
1660         /* Step 2: write sit cache */
1661         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1662         memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1663                                                 SUM_JOURNAL_SIZE);
1664         written_size += SUM_JOURNAL_SIZE;
1665
1666         /* Step 3: write summary entries */
1667         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1668                 unsigned short blkoff;
1669                 seg_i = CURSEG_I(sbi, i);
1670                 if (sbi->ckpt->alloc_type[i] == SSR)
1671                         blkoff = sbi->blocks_per_seg;
1672                 else
1673                         blkoff = curseg_blkoff(sbi, i);
1674
1675                 for (j = 0; j < blkoff; j++) {
1676                         if (!page) {
1677                                 page = grab_meta_page(sbi, blkaddr++);
1678                                 kaddr = (unsigned char *)page_address(page);
1679                                 written_size = 0;
1680                         }
1681                         summary = (struct f2fs_summary *)(kaddr + written_size);
1682                         *summary = seg_i->sum_blk->entries[j];
1683                         written_size += SUMMARY_SIZE;
1684
1685                         if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1686                                                         SUM_FOOTER_SIZE)
1687                                 continue;
1688
1689                         set_page_dirty(page);
1690                         f2fs_put_page(page, 1);
1691                         page = NULL;
1692                 }
1693         }
1694         if (page) {
1695                 set_page_dirty(page);
1696                 f2fs_put_page(page, 1);
1697         }
1698 }
1699
1700 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1701                                         block_t blkaddr, int type)
1702 {
1703         int i, end;
1704         if (IS_DATASEG(type))
1705                 end = type + NR_CURSEG_DATA_TYPE;
1706         else
1707                 end = type + NR_CURSEG_NODE_TYPE;
1708
1709         for (i = type; i < end; i++) {
1710                 struct curseg_info *sum = CURSEG_I(sbi, i);
1711                 mutex_lock(&sum->curseg_mutex);
1712                 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1713                 mutex_unlock(&sum->curseg_mutex);
1714         }
1715 }
1716
1717 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1718 {
1719         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1720                 write_compacted_summaries(sbi, start_blk);
1721         else
1722                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1723 }
1724
1725 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1726 {
1727         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1728 }
1729
1730 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1731                                         unsigned int val, int alloc)
1732 {
1733         int i;
1734
1735         if (type == NAT_JOURNAL) {
1736                 for (i = 0; i < nats_in_cursum(sum); i++) {
1737                         if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1738                                 return i;
1739                 }
1740                 if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL))
1741                         return update_nats_in_cursum(sum, 1);
1742         } else if (type == SIT_JOURNAL) {
1743                 for (i = 0; i < sits_in_cursum(sum); i++)
1744                         if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1745                                 return i;
1746                 if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL))
1747                         return update_sits_in_cursum(sum, 1);
1748         }
1749         return -1;
1750 }
1751
1752 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1753                                         unsigned int segno)
1754 {
1755         return get_meta_page(sbi, current_sit_addr(sbi, segno));
1756 }
1757
1758 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1759                                         unsigned int start)
1760 {
1761         struct sit_info *sit_i = SIT_I(sbi);
1762         struct page *src_page, *dst_page;
1763         pgoff_t src_off, dst_off;
1764         void *src_addr, *dst_addr;
1765
1766         src_off = current_sit_addr(sbi, start);
1767         dst_off = next_sit_addr(sbi, src_off);
1768
1769         /* get current sit block page without lock */
1770         src_page = get_meta_page(sbi, src_off);
1771         dst_page = grab_meta_page(sbi, dst_off);
1772         f2fs_bug_on(sbi, PageDirty(src_page));
1773
1774         src_addr = page_address(src_page);
1775         dst_addr = page_address(dst_page);
1776         memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1777
1778         set_page_dirty(dst_page);
1779         f2fs_put_page(src_page, 1);
1780
1781         set_to_next_sit(sit_i, start);
1782
1783         return dst_page;
1784 }
1785
1786 static struct sit_entry_set *grab_sit_entry_set(void)
1787 {
1788         struct sit_entry_set *ses =
1789                         f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
1790
1791         ses->entry_cnt = 0;
1792         INIT_LIST_HEAD(&ses->set_list);
1793         return ses;
1794 }
1795
1796 static void release_sit_entry_set(struct sit_entry_set *ses)
1797 {
1798         list_del(&ses->set_list);
1799         kmem_cache_free(sit_entry_set_slab, ses);
1800 }
1801
1802 static void adjust_sit_entry_set(struct sit_entry_set *ses,
1803                                                 struct list_head *head)
1804 {
1805         struct sit_entry_set *next = ses;
1806
1807         if (list_is_last(&ses->set_list, head))
1808                 return;
1809
1810         list_for_each_entry_continue(next, head, set_list)
1811                 if (ses->entry_cnt <= next->entry_cnt)
1812                         break;
1813
1814         list_move_tail(&ses->set_list, &next->set_list);
1815 }
1816
1817 static void add_sit_entry(unsigned int segno, struct list_head *head)
1818 {
1819         struct sit_entry_set *ses;
1820         unsigned int start_segno = START_SEGNO(segno);
1821
1822         list_for_each_entry(ses, head, set_list) {
1823                 if (ses->start_segno == start_segno) {
1824                         ses->entry_cnt++;
1825                         adjust_sit_entry_set(ses, head);
1826                         return;
1827                 }
1828         }
1829
1830         ses = grab_sit_entry_set();
1831
1832         ses->start_segno = start_segno;
1833         ses->entry_cnt++;
1834         list_add(&ses->set_list, head);
1835 }
1836
1837 static void add_sits_in_set(struct f2fs_sb_info *sbi)
1838 {
1839         struct f2fs_sm_info *sm_info = SM_I(sbi);
1840         struct list_head *set_list = &sm_info->sit_entry_set;
1841         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1842         unsigned int segno;
1843
1844         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1845                 add_sit_entry(segno, set_list);
1846 }
1847
1848 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1849 {
1850         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1851         struct f2fs_summary_block *sum = curseg->sum_blk;
1852         int i;
1853
1854         for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1855                 unsigned int segno;
1856                 bool dirtied;
1857
1858                 segno = le32_to_cpu(segno_in_journal(sum, i));
1859                 dirtied = __mark_sit_entry_dirty(sbi, segno);
1860
1861                 if (!dirtied)
1862                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1863         }
1864         update_sits_in_cursum(sum, -sits_in_cursum(sum));
1865 }
1866
1867 /*
1868  * CP calls this function, which flushes SIT entries including sit_journal,
1869  * and moves prefree segs to free segs.
1870  */
1871 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1872 {
1873         struct sit_info *sit_i = SIT_I(sbi);
1874         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1875         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1876         struct f2fs_summary_block *sum = curseg->sum_blk;
1877         struct sit_entry_set *ses, *tmp;
1878         struct list_head *head = &SM_I(sbi)->sit_entry_set;
1879         bool to_journal = true;
1880         struct seg_entry *se;
1881
1882         mutex_lock(&curseg->curseg_mutex);
1883         mutex_lock(&sit_i->sentry_lock);
1884
1885         if (!sit_i->dirty_sentries)
1886                 goto out;
1887
1888         /*
1889          * add and account sit entries of dirty bitmap in sit entry
1890          * set temporarily
1891          */
1892         add_sits_in_set(sbi);
1893
1894         /*
1895          * if there are no enough space in journal to store dirty sit
1896          * entries, remove all entries from journal and add and account
1897          * them in sit entry set.
1898          */
1899         if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1900                 remove_sits_in_journal(sbi);
1901
1902         /*
1903          * there are two steps to flush sit entries:
1904          * #1, flush sit entries to journal in current cold data summary block.
1905          * #2, flush sit entries to sit page.
1906          */
1907         list_for_each_entry_safe(ses, tmp, head, set_list) {
1908                 struct page *page = NULL;
1909                 struct f2fs_sit_block *raw_sit = NULL;
1910                 unsigned int start_segno = ses->start_segno;
1911                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1912                                                 (unsigned long)MAIN_SEGS(sbi));
1913                 unsigned int segno = start_segno;
1914
1915                 if (to_journal &&
1916                         !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1917                         to_journal = false;
1918
1919                 if (!to_journal) {
1920                         page = get_next_sit_page(sbi, start_segno);
1921                         raw_sit = page_address(page);
1922                 }
1923
1924                 /* flush dirty sit entries in region of current sit set */
1925                 for_each_set_bit_from(segno, bitmap, end) {
1926                         int offset, sit_offset;
1927
1928                         se = get_seg_entry(sbi, segno);
1929
1930                         /* add discard candidates */
1931                         if (cpc->reason != CP_DISCARD) {
1932                                 cpc->trim_start = segno;
1933                                 add_discard_addrs(sbi, cpc);
1934                         }
1935
1936                         if (to_journal) {
1937                                 offset = lookup_journal_in_cursum(sum,
1938                                                         SIT_JOURNAL, segno, 1);
1939                                 f2fs_bug_on(sbi, offset < 0);
1940                                 segno_in_journal(sum, offset) =
1941                                                         cpu_to_le32(segno);
1942                                 seg_info_to_raw_sit(se,
1943                                                 &sit_in_journal(sum, offset));
1944                         } else {
1945                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1946                                 seg_info_to_raw_sit(se,
1947                                                 &raw_sit->entries[sit_offset]);
1948                         }
1949
1950                         __clear_bit(segno, bitmap);
1951                         sit_i->dirty_sentries--;
1952                         ses->entry_cnt--;
1953                 }
1954
1955                 if (!to_journal)
1956                         f2fs_put_page(page, 1);
1957
1958                 f2fs_bug_on(sbi, ses->entry_cnt);
1959                 release_sit_entry_set(ses);
1960         }
1961
1962         f2fs_bug_on(sbi, !list_empty(head));
1963         f2fs_bug_on(sbi, sit_i->dirty_sentries);
1964 out:
1965         if (cpc->reason == CP_DISCARD) {
1966                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1967                         add_discard_addrs(sbi, cpc);
1968         }
1969         mutex_unlock(&sit_i->sentry_lock);
1970         mutex_unlock(&curseg->curseg_mutex);
1971
1972         set_prefree_as_free_segments(sbi);
1973 }
1974
1975 static int build_sit_info(struct f2fs_sb_info *sbi)
1976 {
1977         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1978         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1979         struct sit_info *sit_i;
1980         unsigned int sit_segs, start;
1981         char *src_bitmap, *dst_bitmap;
1982         unsigned int bitmap_size;
1983
1984         /* allocate memory for SIT information */
1985         sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1986         if (!sit_i)
1987                 return -ENOMEM;
1988
1989         SM_I(sbi)->sit_info = sit_i;
1990
1991         sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) *
1992                                         sizeof(struct seg_entry), GFP_KERNEL);
1993         if (!sit_i->sentries)
1994                 return -ENOMEM;
1995
1996         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1997         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
1998         if (!sit_i->dirty_sentries_bitmap)
1999                 return -ENOMEM;
2000
2001         for (start = 0; start < MAIN_SEGS(sbi); start++) {
2002                 sit_i->sentries[start].cur_valid_map
2003                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2004                 sit_i->sentries[start].ckpt_valid_map
2005                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2006                 sit_i->sentries[start].discard_map
2007                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2008                 if (!sit_i->sentries[start].cur_valid_map ||
2009                                 !sit_i->sentries[start].ckpt_valid_map ||
2010                                 !sit_i->sentries[start].discard_map)
2011                         return -ENOMEM;
2012         }
2013
2014         sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2015         if (!sit_i->tmp_map)
2016                 return -ENOMEM;
2017
2018         if (sbi->segs_per_sec > 1) {
2019                 sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) *
2020                                         sizeof(struct sec_entry), GFP_KERNEL);
2021                 if (!sit_i->sec_entries)
2022                         return -ENOMEM;
2023         }
2024
2025         /* get information related with SIT */
2026         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
2027
2028         /* setup SIT bitmap from ckeckpoint pack */
2029         bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
2030         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
2031
2032         dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2033         if (!dst_bitmap)
2034                 return -ENOMEM;
2035
2036         /* init SIT information */
2037         sit_i->s_ops = &default_salloc_ops;
2038
2039         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
2040         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
2041         sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
2042         sit_i->sit_bitmap = dst_bitmap;
2043         sit_i->bitmap_size = bitmap_size;
2044         sit_i->dirty_sentries = 0;
2045         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
2046         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
2047         sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
2048         mutex_init(&sit_i->sentry_lock);
2049         return 0;
2050 }
2051
2052 static int build_free_segmap(struct f2fs_sb_info *sbi)
2053 {
2054         struct free_segmap_info *free_i;
2055         unsigned int bitmap_size, sec_bitmap_size;
2056
2057         /* allocate memory for free segmap information */
2058         free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
2059         if (!free_i)
2060                 return -ENOMEM;
2061
2062         SM_I(sbi)->free_info = free_i;
2063
2064         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2065         free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
2066         if (!free_i->free_segmap)
2067                 return -ENOMEM;
2068
2069         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2070         free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
2071         if (!free_i->free_secmap)
2072                 return -ENOMEM;
2073
2074         /* set all segments as dirty temporarily */
2075         memset(free_i->free_segmap, 0xff, bitmap_size);
2076         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
2077
2078         /* init free segmap information */
2079         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
2080         free_i->free_segments = 0;
2081         free_i->free_sections = 0;
2082         spin_lock_init(&free_i->segmap_lock);
2083         return 0;
2084 }
2085
2086 static int build_curseg(struct f2fs_sb_info *sbi)
2087 {
2088         struct curseg_info *array;
2089         int i;
2090
2091         array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
2092         if (!array)
2093                 return -ENOMEM;
2094
2095         SM_I(sbi)->curseg_array = array;
2096
2097         for (i = 0; i < NR_CURSEG_TYPE; i++) {
2098                 mutex_init(&array[i].curseg_mutex);
2099                 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
2100                 if (!array[i].sum_blk)
2101                         return -ENOMEM;
2102                 array[i].segno = NULL_SEGNO;
2103                 array[i].next_blkoff = 0;
2104         }
2105         return restore_curseg_summaries(sbi);
2106 }
2107
2108 static void build_sit_entries(struct f2fs_sb_info *sbi)
2109 {
2110         struct sit_info *sit_i = SIT_I(sbi);
2111         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2112         struct f2fs_summary_block *sum = curseg->sum_blk;
2113         int sit_blk_cnt = SIT_BLK_CNT(sbi);
2114         unsigned int i, start, end;
2115         unsigned int readed, start_blk = 0;
2116         int nrpages = MAX_BIO_BLOCKS(sbi);
2117
2118         do {
2119                 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
2120
2121                 start = start_blk * sit_i->sents_per_block;
2122                 end = (start_blk + readed) * sit_i->sents_per_block;
2123
2124                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
2125                         struct seg_entry *se = &sit_i->sentries[start];
2126                         struct f2fs_sit_block *sit_blk;
2127                         struct f2fs_sit_entry sit;
2128                         struct page *page;
2129
2130                         mutex_lock(&curseg->curseg_mutex);
2131                         for (i = 0; i < sits_in_cursum(sum); i++) {
2132                                 if (le32_to_cpu(segno_in_journal(sum, i))
2133                                                                 == start) {
2134                                         sit = sit_in_journal(sum, i);
2135                                         mutex_unlock(&curseg->curseg_mutex);
2136                                         goto got_it;
2137                                 }
2138                         }
2139                         mutex_unlock(&curseg->curseg_mutex);
2140
2141                         page = get_current_sit_page(sbi, start);
2142                         sit_blk = (struct f2fs_sit_block *)page_address(page);
2143                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
2144                         f2fs_put_page(page, 1);
2145 got_it:
2146                         check_block_count(sbi, start, &sit);
2147                         seg_info_from_raw_sit(se, &sit);
2148
2149                         /* build discard map only one time */
2150                         memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
2151                         sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
2152
2153                         if (sbi->segs_per_sec > 1) {
2154                                 struct sec_entry *e = get_sec_entry(sbi, start);
2155                                 e->valid_blocks += se->valid_blocks;
2156                         }
2157                 }
2158                 start_blk += readed;
2159         } while (start_blk < sit_blk_cnt);
2160 }
2161
2162 static void init_free_segmap(struct f2fs_sb_info *sbi)
2163 {
2164         unsigned int start;
2165         int type;
2166
2167         for (start = 0; start < MAIN_SEGS(sbi); start++) {
2168                 struct seg_entry *sentry = get_seg_entry(sbi, start);
2169                 if (!sentry->valid_blocks)
2170                         __set_free(sbi, start);
2171         }
2172
2173         /* set use the current segments */
2174         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
2175                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
2176                 __set_test_and_inuse(sbi, curseg_t->segno);
2177         }
2178 }
2179
2180 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
2181 {
2182         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2183         struct free_segmap_info *free_i = FREE_I(sbi);
2184         unsigned int segno = 0, offset = 0;
2185         unsigned short valid_blocks;
2186
2187         while (1) {
2188                 /* find dirty segment based on free segmap */
2189                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
2190                 if (segno >= MAIN_SEGS(sbi))
2191                         break;
2192                 offset = segno + 1;
2193                 valid_blocks = get_valid_blocks(sbi, segno, 0);
2194                 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2195                         continue;
2196                 if (valid_blocks > sbi->blocks_per_seg) {
2197                         f2fs_bug_on(sbi, 1);
2198                         continue;
2199                 }
2200                 mutex_lock(&dirty_i->seglist_lock);
2201                 __locate_dirty_segment(sbi, segno, DIRTY);
2202                 mutex_unlock(&dirty_i->seglist_lock);
2203         }
2204 }
2205
2206 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2207 {
2208         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2209         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2210
2211         dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2212         if (!dirty_i->victim_secmap)
2213                 return -ENOMEM;
2214         return 0;
2215 }
2216
2217 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2218 {
2219         struct dirty_seglist_info *dirty_i;
2220         unsigned int bitmap_size, i;
2221
2222         /* allocate memory for dirty segments list information */
2223         dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2224         if (!dirty_i)
2225                 return -ENOMEM;
2226
2227         SM_I(sbi)->dirty_info = dirty_i;
2228         mutex_init(&dirty_i->seglist_lock);
2229
2230         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2231
2232         for (i = 0; i < NR_DIRTY_TYPE; i++) {
2233                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2234                 if (!dirty_i->dirty_segmap[i])
2235                         return -ENOMEM;
2236         }
2237
2238         init_dirty_segmap(sbi);
2239         return init_victim_secmap(sbi);
2240 }
2241
2242 /*
2243  * Update min, max modified time for cost-benefit GC algorithm
2244  */
2245 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2246 {
2247         struct sit_info *sit_i = SIT_I(sbi);
2248         unsigned int segno;
2249
2250         mutex_lock(&sit_i->sentry_lock);
2251
2252         sit_i->min_mtime = LLONG_MAX;
2253
2254         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2255                 unsigned int i;
2256                 unsigned long long mtime = 0;
2257
2258                 for (i = 0; i < sbi->segs_per_sec; i++)
2259                         mtime += get_seg_entry(sbi, segno + i)->mtime;
2260
2261                 mtime = div_u64(mtime, sbi->segs_per_sec);
2262
2263                 if (sit_i->min_mtime > mtime)
2264                         sit_i->min_mtime = mtime;
2265         }
2266         sit_i->max_mtime = get_mtime(sbi);
2267         mutex_unlock(&sit_i->sentry_lock);
2268 }
2269
2270 int build_segment_manager(struct f2fs_sb_info *sbi)
2271 {
2272         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2273         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2274         struct f2fs_sm_info *sm_info;
2275         int err;
2276
2277         sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2278         if (!sm_info)
2279                 return -ENOMEM;
2280
2281         /* init sm info */
2282         sbi->sm_info = sm_info;
2283         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2284         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2285         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2286         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2287         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2288         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2289         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2290         sm_info->rec_prefree_segments = sm_info->main_segments *
2291                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2292         sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2293         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2294         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2295
2296         INIT_LIST_HEAD(&sm_info->discard_list);
2297         sm_info->nr_discards = 0;
2298         sm_info->max_discards = 0;
2299
2300         sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2301
2302         INIT_LIST_HEAD(&sm_info->sit_entry_set);
2303
2304         if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2305                 err = create_flush_cmd_control(sbi);
2306                 if (err)
2307                         return err;
2308         }
2309
2310         err = build_sit_info(sbi);
2311         if (err)
2312                 return err;
2313         err = build_free_segmap(sbi);
2314         if (err)
2315                 return err;
2316         err = build_curseg(sbi);
2317         if (err)
2318                 return err;
2319
2320         /* reinit free segmap based on SIT */
2321         build_sit_entries(sbi);
2322
2323         init_free_segmap(sbi);
2324         err = build_dirty_segmap(sbi);
2325         if (err)
2326                 return err;
2327
2328         init_min_max_mtime(sbi);
2329         return 0;
2330 }
2331
2332 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2333                 enum dirty_type dirty_type)
2334 {
2335         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2336
2337         mutex_lock(&dirty_i->seglist_lock);
2338         kvfree(dirty_i->dirty_segmap[dirty_type]);
2339         dirty_i->nr_dirty[dirty_type] = 0;
2340         mutex_unlock(&dirty_i->seglist_lock);
2341 }
2342
2343 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2344 {
2345         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2346         kvfree(dirty_i->victim_secmap);
2347 }
2348
2349 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2350 {
2351         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2352         int i;
2353
2354         if (!dirty_i)
2355                 return;
2356
2357         /* discard pre-free/dirty segments list */
2358         for (i = 0; i < NR_DIRTY_TYPE; i++)
2359                 discard_dirty_segmap(sbi, i);
2360
2361         destroy_victim_secmap(sbi);
2362         SM_I(sbi)->dirty_info = NULL;
2363         kfree(dirty_i);
2364 }
2365
2366 static void destroy_curseg(struct f2fs_sb_info *sbi)
2367 {
2368         struct curseg_info *array = SM_I(sbi)->curseg_array;
2369         int i;
2370
2371         if (!array)
2372                 return;
2373         SM_I(sbi)->curseg_array = NULL;
2374         for (i = 0; i < NR_CURSEG_TYPE; i++)
2375                 kfree(array[i].sum_blk);
2376         kfree(array);
2377 }
2378
2379 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2380 {
2381         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2382         if (!free_i)
2383                 return;
2384         SM_I(sbi)->free_info = NULL;
2385         kvfree(free_i->free_segmap);
2386         kvfree(free_i->free_secmap);
2387         kfree(free_i);
2388 }
2389
2390 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2391 {
2392         struct sit_info *sit_i = SIT_I(sbi);
2393         unsigned int start;
2394
2395         if (!sit_i)
2396                 return;
2397
2398         if (sit_i->sentries) {
2399                 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2400                         kfree(sit_i->sentries[start].cur_valid_map);
2401                         kfree(sit_i->sentries[start].ckpt_valid_map);
2402                         kfree(sit_i->sentries[start].discard_map);
2403                 }
2404         }
2405         kfree(sit_i->tmp_map);
2406
2407         kvfree(sit_i->sentries);
2408         kvfree(sit_i->sec_entries);
2409         kvfree(sit_i->dirty_sentries_bitmap);
2410
2411         SM_I(sbi)->sit_info = NULL;
2412         kfree(sit_i->sit_bitmap);
2413         kfree(sit_i);
2414 }
2415
2416 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2417 {
2418         struct f2fs_sm_info *sm_info = SM_I(sbi);
2419
2420         if (!sm_info)
2421                 return;
2422         destroy_flush_cmd_control(sbi);
2423         destroy_dirty_segmap(sbi);
2424         destroy_curseg(sbi);
2425         destroy_free_segmap(sbi);
2426         destroy_sit_info(sbi);
2427         sbi->sm_info = NULL;
2428         kfree(sm_info);
2429 }
2430
2431 int __init create_segment_manager_caches(void)
2432 {
2433         discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2434                         sizeof(struct discard_entry));
2435         if (!discard_entry_slab)
2436                 goto fail;
2437
2438         sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2439                         sizeof(struct sit_entry_set));
2440         if (!sit_entry_set_slab)
2441                 goto destory_discard_entry;
2442
2443         inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2444                         sizeof(struct inmem_pages));
2445         if (!inmem_entry_slab)
2446                 goto destroy_sit_entry_set;
2447         return 0;
2448
2449 destroy_sit_entry_set:
2450         kmem_cache_destroy(sit_entry_set_slab);
2451 destory_discard_entry:
2452         kmem_cache_destroy(discard_entry_slab);
2453 fail:
2454         return -ENOMEM;
2455 }
2456
2457 void destroy_segment_manager_caches(void)
2458 {
2459         kmem_cache_destroy(sit_entry_set_slab);
2460         kmem_cache_destroy(discard_entry_slab);
2461         kmem_cache_destroy(inmem_entry_slab);
2462 }