GNU Linux-libre 4.9.337-gnu1
[releases.git] / fs / f2fs / checkpoint.c
1 /*
2  * fs/f2fs/checkpoint.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/bio.h>
13 #include <linux/mpage.h>
14 #include <linux/writeback.h>
15 #include <linux/blkdev.h>
16 #include <linux/f2fs_fs.h>
17 #include <linux/pagevec.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "node.h"
22 #include "segment.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 static struct kmem_cache *ino_entry_slab;
27 struct kmem_cache *inode_entry_slab;
28
29 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
30 {
31         set_ckpt_flags(sbi, CP_ERROR_FLAG);
32         sbi->sb->s_flags |= MS_RDONLY;
33         if (!end_io)
34                 f2fs_flush_merged_bios(sbi);
35 }
36
37 /*
38  * We guarantee no failure on the returned page.
39  */
40 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
41 {
42         struct address_space *mapping = META_MAPPING(sbi);
43         struct page *page = NULL;
44 repeat:
45         page = f2fs_grab_cache_page(mapping, index, false);
46         if (!page) {
47                 cond_resched();
48                 goto repeat;
49         }
50         f2fs_wait_on_page_writeback(page, META, true);
51         if (!PageUptodate(page))
52                 SetPageUptodate(page);
53         return page;
54 }
55
56 /*
57  * We guarantee no failure on the returned page.
58  */
59 static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
60                                                         bool is_meta)
61 {
62         struct address_space *mapping = META_MAPPING(sbi);
63         struct page *page;
64         struct f2fs_io_info fio = {
65                 .sbi = sbi,
66                 .type = META,
67                 .op = REQ_OP_READ,
68                 .op_flags = READ_SYNC | REQ_META | REQ_PRIO,
69                 .old_blkaddr = index,
70                 .new_blkaddr = index,
71                 .encrypted_page = NULL,
72                 .is_meta = is_meta,
73         };
74
75         if (unlikely(!is_meta))
76                 fio.op_flags &= ~REQ_META;
77 repeat:
78         page = f2fs_grab_cache_page(mapping, index, false);
79         if (!page) {
80                 cond_resched();
81                 goto repeat;
82         }
83         if (PageUptodate(page))
84                 goto out;
85
86         fio.page = page;
87
88         if (f2fs_submit_page_bio(&fio)) {
89                 memset(page_address(page), 0, PAGE_SIZE);
90                 f2fs_stop_checkpoint(sbi, false);
91                 f2fs_bug_on(sbi, 1);
92                 return page;
93         }
94
95         lock_page(page);
96         if (unlikely(page->mapping != mapping)) {
97                 f2fs_put_page(page, 1);
98                 goto repeat;
99         }
100
101         /*
102          * if there is any IO error when accessing device, make our filesystem
103          * readonly and make sure do not write checkpoint with non-uptodate
104          * meta page.
105          */
106         if (unlikely(!PageUptodate(page)))
107                 f2fs_stop_checkpoint(sbi, false);
108 out:
109         return page;
110 }
111
112 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
113 {
114         return __get_meta_page(sbi, index, true);
115 }
116
117 /* for POR only */
118 struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
119 {
120         return __get_meta_page(sbi, index, false);
121 }
122
123 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
124                                         block_t blkaddr, int type)
125 {
126         switch (type) {
127         case META_NAT:
128                 break;
129         case META_SIT:
130                 if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
131                         return false;
132                 break;
133         case META_SSA:
134                 if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
135                         blkaddr < SM_I(sbi)->ssa_blkaddr))
136                         return false;
137                 break;
138         case META_CP:
139                 if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
140                         blkaddr < __start_cp_addr(sbi)))
141                         return false;
142                 break;
143         case META_POR:
144         case DATA_GENERIC:
145                 if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
146                         blkaddr < MAIN_BLKADDR(sbi))) {
147                         if (type == DATA_GENERIC) {
148                                 f2fs_msg(sbi->sb, KERN_WARNING,
149                                         "access invalid blkaddr:%u", blkaddr);
150                                 WARN_ON(1);
151                         }
152                         return false;
153                 }
154                 break;
155         case META_GENERIC:
156                 if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
157                         blkaddr >= MAIN_BLKADDR(sbi)))
158                         return false;
159                 break;
160         default:
161                 BUG();
162         }
163
164         return true;
165 }
166
167 /*
168  * Readahead CP/NAT/SIT/SSA pages
169  */
170 int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
171                                                         int type, bool sync)
172 {
173         struct page *page;
174         block_t blkno = start;
175         struct f2fs_io_info fio = {
176                 .sbi = sbi,
177                 .type = META,
178                 .op = REQ_OP_READ,
179                 .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
180                 .encrypted_page = NULL,
181                 .is_meta = (type != META_POR),
182         };
183         struct blk_plug plug;
184
185         if (unlikely(type == META_POR))
186                 fio.op_flags &= ~REQ_META;
187
188         blk_start_plug(&plug);
189         for (; nrpages-- > 0; blkno++) {
190
191                 if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
192                         goto out;
193
194                 switch (type) {
195                 case META_NAT:
196                         if (unlikely(blkno >=
197                                         NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
198                                 blkno = 0;
199                         /* get nat block addr */
200                         fio.new_blkaddr = current_nat_addr(sbi,
201                                         blkno * NAT_ENTRY_PER_BLOCK);
202                         break;
203                 case META_SIT:
204                         if (unlikely(blkno >= TOTAL_SEGS(sbi)))
205                                 goto out;
206                         /* get sit block addr */
207                         fio.new_blkaddr = current_sit_addr(sbi,
208                                         blkno * SIT_ENTRY_PER_BLOCK);
209                         break;
210                 case META_SSA:
211                 case META_CP:
212                 case META_POR:
213                         fio.new_blkaddr = blkno;
214                         break;
215                 default:
216                         BUG();
217                 }
218
219                 page = f2fs_grab_cache_page(META_MAPPING(sbi),
220                                                 fio.new_blkaddr, false);
221                 if (!page)
222                         continue;
223                 if (PageUptodate(page)) {
224                         f2fs_put_page(page, 1);
225                         continue;
226                 }
227
228                 fio.page = page;
229                 fio.old_blkaddr = fio.new_blkaddr;
230                 f2fs_submit_page_mbio(&fio);
231                 f2fs_put_page(page, 0);
232         }
233 out:
234         f2fs_submit_merged_bio(sbi, META, READ);
235         blk_finish_plug(&plug);
236         return blkno - start;
237 }
238
239 void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
240 {
241         struct page *page;
242         bool readahead = false;
243
244         page = find_get_page(META_MAPPING(sbi), index);
245         if (!page || !PageUptodate(page))
246                 readahead = true;
247         f2fs_put_page(page, 0);
248
249         if (readahead)
250                 ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
251 }
252
253 static int f2fs_write_meta_page(struct page *page,
254                                 struct writeback_control *wbc)
255 {
256         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
257
258         trace_f2fs_writepage(page, META);
259
260         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
261                 goto redirty_out;
262         if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
263                 goto redirty_out;
264         if (unlikely(f2fs_cp_error(sbi)))
265                 goto redirty_out;
266
267         write_meta_page(sbi, page);
268         dec_page_count(sbi, F2FS_DIRTY_META);
269
270         if (wbc->for_reclaim)
271                 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
272
273         unlock_page(page);
274
275         if (unlikely(f2fs_cp_error(sbi)))
276                 f2fs_submit_merged_bio(sbi, META, WRITE);
277
278         return 0;
279
280 redirty_out:
281         redirty_page_for_writepage(wbc, page);
282         return AOP_WRITEPAGE_ACTIVATE;
283 }
284
285 static int f2fs_write_meta_pages(struct address_space *mapping,
286                                 struct writeback_control *wbc)
287 {
288         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
289         long diff, written;
290
291         /* collect a number of dirty meta pages and write together */
292         if (wbc->for_kupdate ||
293                 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
294                 goto skip_write;
295
296         trace_f2fs_writepages(mapping->host, wbc, META);
297
298         /* if mounting is failed, skip writing node pages */
299         mutex_lock(&sbi->cp_mutex);
300         diff = nr_pages_to_write(sbi, META, wbc);
301         written = sync_meta_pages(sbi, META, wbc->nr_to_write);
302         mutex_unlock(&sbi->cp_mutex);
303         wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
304         return 0;
305
306 skip_write:
307         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
308         trace_f2fs_writepages(mapping->host, wbc, META);
309         return 0;
310 }
311
312 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
313                                                 long nr_to_write)
314 {
315         struct address_space *mapping = META_MAPPING(sbi);
316         pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
317         struct pagevec pvec;
318         long nwritten = 0;
319         struct writeback_control wbc = {
320                 .for_reclaim = 0,
321         };
322         struct blk_plug plug;
323
324         pagevec_init(&pvec, 0);
325
326         blk_start_plug(&plug);
327
328         while (index <= end) {
329                 int i, nr_pages;
330                 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
331                                 PAGECACHE_TAG_DIRTY,
332                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
333                 if (unlikely(nr_pages == 0))
334                         break;
335
336                 for (i = 0; i < nr_pages; i++) {
337                         struct page *page = pvec.pages[i];
338
339                         if (prev == ULONG_MAX)
340                                 prev = page->index - 1;
341                         if (nr_to_write != LONG_MAX && page->index != prev + 1) {
342                                 pagevec_release(&pvec);
343                                 goto stop;
344                         }
345
346                         lock_page(page);
347
348                         if (unlikely(page->mapping != mapping)) {
349 continue_unlock:
350                                 unlock_page(page);
351                                 continue;
352                         }
353                         if (!PageDirty(page)) {
354                                 /* someone wrote it for us */
355                                 goto continue_unlock;
356                         }
357
358                         f2fs_wait_on_page_writeback(page, META, true);
359
360                         BUG_ON(PageWriteback(page));
361                         if (!clear_page_dirty_for_io(page))
362                                 goto continue_unlock;
363
364                         if (mapping->a_ops->writepage(page, &wbc)) {
365                                 unlock_page(page);
366                                 break;
367                         }
368                         nwritten++;
369                         prev = page->index;
370                         if (unlikely(nwritten >= nr_to_write))
371                                 break;
372                 }
373                 pagevec_release(&pvec);
374                 cond_resched();
375         }
376 stop:
377         if (nwritten)
378                 f2fs_submit_merged_bio(sbi, type, WRITE);
379
380         blk_finish_plug(&plug);
381
382         return nwritten;
383 }
384
385 static int f2fs_set_meta_page_dirty(struct page *page)
386 {
387         trace_f2fs_set_page_dirty(page, META);
388
389         if (!PageUptodate(page))
390                 SetPageUptodate(page);
391         if (!PageDirty(page)) {
392                 f2fs_set_page_dirty_nobuffers(page);
393                 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
394                 SetPagePrivate(page);
395                 f2fs_trace_pid(page);
396                 return 1;
397         }
398         return 0;
399 }
400
401 const struct address_space_operations f2fs_meta_aops = {
402         .writepage      = f2fs_write_meta_page,
403         .writepages     = f2fs_write_meta_pages,
404         .set_page_dirty = f2fs_set_meta_page_dirty,
405         .invalidatepage = f2fs_invalidate_page,
406         .releasepage    = f2fs_release_page,
407 #ifdef CONFIG_MIGRATION
408         .migratepage    = f2fs_migrate_page,
409 #endif
410 };
411
412 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
413 {
414         struct inode_management *im = &sbi->im[type];
415         struct ino_entry *e, *tmp;
416
417         tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
418 retry:
419         radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
420
421         spin_lock(&im->ino_lock);
422         e = radix_tree_lookup(&im->ino_root, ino);
423         if (!e) {
424                 e = tmp;
425                 if (radix_tree_insert(&im->ino_root, ino, e)) {
426                         spin_unlock(&im->ino_lock);
427                         radix_tree_preload_end();
428                         goto retry;
429                 }
430                 memset(e, 0, sizeof(struct ino_entry));
431                 e->ino = ino;
432
433                 list_add_tail(&e->list, &im->ino_list);
434                 if (type != ORPHAN_INO)
435                         im->ino_num++;
436         }
437         spin_unlock(&im->ino_lock);
438         radix_tree_preload_end();
439
440         if (e != tmp)
441                 kmem_cache_free(ino_entry_slab, tmp);
442 }
443
444 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
445 {
446         struct inode_management *im = &sbi->im[type];
447         struct ino_entry *e;
448
449         spin_lock(&im->ino_lock);
450         e = radix_tree_lookup(&im->ino_root, ino);
451         if (e) {
452                 list_del(&e->list);
453                 radix_tree_delete(&im->ino_root, ino);
454                 im->ino_num--;
455                 spin_unlock(&im->ino_lock);
456                 kmem_cache_free(ino_entry_slab, e);
457                 return;
458         }
459         spin_unlock(&im->ino_lock);
460 }
461
462 void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
463 {
464         /* add new dirty ino entry into list */
465         __add_ino_entry(sbi, ino, type);
466 }
467
468 void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
469 {
470         /* remove dirty ino entry from list */
471         __remove_ino_entry(sbi, ino, type);
472 }
473
474 /* mode should be APPEND_INO or UPDATE_INO */
475 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
476 {
477         struct inode_management *im = &sbi->im[mode];
478         struct ino_entry *e;
479
480         spin_lock(&im->ino_lock);
481         e = radix_tree_lookup(&im->ino_root, ino);
482         spin_unlock(&im->ino_lock);
483         return e ? true : false;
484 }
485
486 void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
487 {
488         struct ino_entry *e, *tmp;
489         int i;
490
491         for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
492                 struct inode_management *im = &sbi->im[i];
493
494                 spin_lock(&im->ino_lock);
495                 list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
496                         list_del(&e->list);
497                         radix_tree_delete(&im->ino_root, e->ino);
498                         kmem_cache_free(ino_entry_slab, e);
499                         im->ino_num--;
500                 }
501                 spin_unlock(&im->ino_lock);
502         }
503 }
504
505 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
506 {
507         struct inode_management *im = &sbi->im[ORPHAN_INO];
508         int err = 0;
509
510         spin_lock(&im->ino_lock);
511
512 #ifdef CONFIG_F2FS_FAULT_INJECTION
513         if (time_to_inject(sbi, FAULT_ORPHAN)) {
514                 spin_unlock(&im->ino_lock);
515                 return -ENOSPC;
516         }
517 #endif
518         if (unlikely(im->ino_num >= sbi->max_orphans))
519                 err = -ENOSPC;
520         else
521                 im->ino_num++;
522         spin_unlock(&im->ino_lock);
523
524         return err;
525 }
526
527 void release_orphan_inode(struct f2fs_sb_info *sbi)
528 {
529         struct inode_management *im = &sbi->im[ORPHAN_INO];
530
531         spin_lock(&im->ino_lock);
532         f2fs_bug_on(sbi, im->ino_num == 0);
533         im->ino_num--;
534         spin_unlock(&im->ino_lock);
535 }
536
537 void add_orphan_inode(struct inode *inode)
538 {
539         /* add new orphan ino entry into list */
540         __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
541         update_inode_page(inode);
542 }
543
544 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
545 {
546         /* remove orphan entry from orphan list */
547         __remove_ino_entry(sbi, ino, ORPHAN_INO);
548 }
549
550 static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
551 {
552         struct inode *inode;
553         struct node_info ni;
554         int err = acquire_orphan_inode(sbi);
555
556         if (err) {
557                 set_sbi_flag(sbi, SBI_NEED_FSCK);
558                 f2fs_msg(sbi->sb, KERN_WARNING,
559                                 "%s: orphan failed (ino=%x), run fsck to fix.",
560                                 __func__, ino);
561                 return err;
562         }
563
564         __add_ino_entry(sbi, ino, ORPHAN_INO);
565
566         inode = f2fs_iget_retry(sbi->sb, ino);
567         if (IS_ERR(inode)) {
568                 /*
569                  * there should be a bug that we can't find the entry
570                  * to orphan inode.
571                  */
572                 f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
573                 return PTR_ERR(inode);
574         }
575
576         clear_nlink(inode);
577
578         /* truncate all the data during iput */
579         iput(inode);
580
581         get_node_info(sbi, ino, &ni);
582
583         /* ENOMEM was fully retried in f2fs_evict_inode. */
584         if (ni.blk_addr != NULL_ADDR) {
585                 set_sbi_flag(sbi, SBI_NEED_FSCK);
586                 f2fs_msg(sbi->sb, KERN_WARNING,
587                         "%s: orphan failed (ino=%x), run fsck to fix.",
588                                 __func__, ino);
589                 return -EIO;
590         }
591         __remove_ino_entry(sbi, ino, ORPHAN_INO);
592         return 0;
593 }
594
595 int recover_orphan_inodes(struct f2fs_sb_info *sbi)
596 {
597         block_t start_blk, orphan_blocks, i, j;
598         int err;
599
600         if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
601                 return 0;
602
603         start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
604         orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
605
606         ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
607
608         for (i = 0; i < orphan_blocks; i++) {
609                 struct page *page = get_meta_page(sbi, start_blk + i);
610                 struct f2fs_orphan_block *orphan_blk;
611
612                 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
613                 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
614                         nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
615                         err = recover_orphan_inode(sbi, ino);
616                         if (err) {
617                                 f2fs_put_page(page, 1);
618                                 return err;
619                         }
620                 }
621                 f2fs_put_page(page, 1);
622         }
623         /* clear Orphan Flag */
624         clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
625         return 0;
626 }
627
628 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
629 {
630         struct list_head *head;
631         struct f2fs_orphan_block *orphan_blk = NULL;
632         unsigned int nentries = 0;
633         unsigned short index = 1;
634         unsigned short orphan_blocks;
635         struct page *page = NULL;
636         struct ino_entry *orphan = NULL;
637         struct inode_management *im = &sbi->im[ORPHAN_INO];
638
639         orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
640
641         /*
642          * we don't need to do spin_lock(&im->ino_lock) here, since all the
643          * orphan inode operations are covered under f2fs_lock_op().
644          * And, spin_lock should be avoided due to page operations below.
645          */
646         head = &im->ino_list;
647
648         /* loop for each orphan inode entry and write them in Jornal block */
649         list_for_each_entry(orphan, head, list) {
650                 if (!page) {
651                         page = grab_meta_page(sbi, start_blk++);
652                         orphan_blk =
653                                 (struct f2fs_orphan_block *)page_address(page);
654                         memset(orphan_blk, 0, sizeof(*orphan_blk));
655                 }
656
657                 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
658
659                 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
660                         /*
661                          * an orphan block is full of 1020 entries,
662                          * then we need to flush current orphan blocks
663                          * and bring another one in memory
664                          */
665                         orphan_blk->blk_addr = cpu_to_le16(index);
666                         orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
667                         orphan_blk->entry_count = cpu_to_le32(nentries);
668                         set_page_dirty(page);
669                         f2fs_put_page(page, 1);
670                         index++;
671                         nentries = 0;
672                         page = NULL;
673                 }
674         }
675
676         if (page) {
677                 orphan_blk->blk_addr = cpu_to_le16(index);
678                 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
679                 orphan_blk->entry_count = cpu_to_le32(nentries);
680                 set_page_dirty(page);
681                 f2fs_put_page(page, 1);
682         }
683 }
684
685 static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
686                 struct f2fs_checkpoint **cp_block, struct page **cp_page,
687                 unsigned long long *version)
688 {
689         unsigned long blk_size = sbi->blocksize;
690         size_t crc_offset = 0;
691         __u32 crc = 0;
692
693         *cp_page = get_meta_page(sbi, cp_addr);
694         *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
695
696         crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
697         if (crc_offset >= blk_size) {
698                 f2fs_put_page(*cp_page, 1);
699                 f2fs_msg(sbi->sb, KERN_WARNING,
700                         "invalid crc_offset: %zu", crc_offset);
701                 return -EINVAL;
702         }
703
704         crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
705                                                         + crc_offset)));
706         if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
707                 f2fs_put_page(*cp_page, 1);
708                 f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
709                 return -EINVAL;
710         }
711
712         *version = cur_cp_version(*cp_block);
713         return 0;
714 }
715
716 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
717                                 block_t cp_addr, unsigned long long *version)
718 {
719         struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
720         struct f2fs_checkpoint *cp_block = NULL;
721         unsigned long long cur_version = 0, pre_version = 0;
722         int err;
723
724         err = get_checkpoint_version(sbi, cp_addr, &cp_block,
725                                         &cp_page_1, version);
726         if (err)
727                 return NULL;
728
729         if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
730                                         sbi->blocks_per_seg) {
731                 f2fs_msg(sbi->sb, KERN_WARNING,
732                         "invalid cp_pack_total_block_count:%u",
733                         le32_to_cpu(cp_block->cp_pack_total_block_count));
734                 goto invalid_cp;
735         }
736         pre_version = *version;
737
738         cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
739         err = get_checkpoint_version(sbi, cp_addr, &cp_block,
740                                         &cp_page_2, version);
741         if (err)
742                 goto invalid_cp;
743         cur_version = *version;
744
745         if (cur_version == pre_version) {
746                 *version = cur_version;
747                 f2fs_put_page(cp_page_2, 1);
748                 return cp_page_1;
749         }
750         f2fs_put_page(cp_page_2, 1);
751 invalid_cp:
752         f2fs_put_page(cp_page_1, 1);
753         return NULL;
754 }
755
756 int get_valid_checkpoint(struct f2fs_sb_info *sbi)
757 {
758         struct f2fs_checkpoint *cp_block;
759         struct f2fs_super_block *fsb = sbi->raw_super;
760         struct page *cp1, *cp2, *cur_page;
761         unsigned long blk_size = sbi->blocksize;
762         unsigned long long cp1_version = 0, cp2_version = 0;
763         unsigned long long cp_start_blk_no;
764         unsigned int cp_blks = 1 + __cp_payload(sbi);
765         block_t cp_blk_no;
766         int i;
767
768         sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
769         if (!sbi->ckpt)
770                 return -ENOMEM;
771         /*
772          * Finding out valid cp block involves read both
773          * sets( cp pack1 and cp pack 2)
774          */
775         cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
776         cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
777
778         /* The second checkpoint pack should start at the next segment */
779         cp_start_blk_no += ((unsigned long long)1) <<
780                                 le32_to_cpu(fsb->log_blocks_per_seg);
781         cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
782
783         if (cp1 && cp2) {
784                 if (ver_after(cp2_version, cp1_version))
785                         cur_page = cp2;
786                 else
787                         cur_page = cp1;
788         } else if (cp1) {
789                 cur_page = cp1;
790         } else if (cp2) {
791                 cur_page = cp2;
792         } else {
793                 goto fail_no_cp;
794         }
795
796         cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
797         memcpy(sbi->ckpt, cp_block, blk_size);
798
799         if (cur_page == cp1)
800                 sbi->cur_cp_pack = 1;
801         else
802                 sbi->cur_cp_pack = 2;
803
804         /* Sanity checking of checkpoint */
805         if (sanity_check_ckpt(sbi))
806                 goto free_fail_no_cp;
807
808         if (cp_blks <= 1)
809                 goto done;
810
811         cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
812         if (cur_page == cp2)
813                 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
814
815         for (i = 1; i < cp_blks; i++) {
816                 void *sit_bitmap_ptr;
817                 unsigned char *ckpt = (unsigned char *)sbi->ckpt;
818
819                 cur_page = get_meta_page(sbi, cp_blk_no + i);
820                 sit_bitmap_ptr = page_address(cur_page);
821                 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
822                 f2fs_put_page(cur_page, 1);
823         }
824 done:
825         f2fs_put_page(cp1, 1);
826         f2fs_put_page(cp2, 1);
827         return 0;
828
829 free_fail_no_cp:
830         f2fs_put_page(cp1, 1);
831         f2fs_put_page(cp2, 1);
832 fail_no_cp:
833         kfree(sbi->ckpt);
834         return -EINVAL;
835 }
836
837 static void __add_dirty_inode(struct inode *inode, enum inode_type type)
838 {
839         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
840         int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
841
842         if (is_inode_flag_set(inode, flag))
843                 return;
844
845         set_inode_flag(inode, flag);
846         list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
847         stat_inc_dirty_inode(sbi, type);
848 }
849
850 static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
851 {
852         int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
853
854         if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
855                 return;
856
857         list_del_init(&F2FS_I(inode)->dirty_list);
858         clear_inode_flag(inode, flag);
859         stat_dec_dirty_inode(F2FS_I_SB(inode), type);
860 }
861
862 void update_dirty_page(struct inode *inode, struct page *page)
863 {
864         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
865         enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
866
867         if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
868                         !S_ISLNK(inode->i_mode))
869                 return;
870
871         spin_lock(&sbi->inode_lock[type]);
872         if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
873                 __add_dirty_inode(inode, type);
874         inode_inc_dirty_pages(inode);
875         spin_unlock(&sbi->inode_lock[type]);
876
877         SetPagePrivate(page);
878         f2fs_trace_pid(page);
879 }
880
881 void remove_dirty_inode(struct inode *inode)
882 {
883         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
884         enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
885
886         if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
887                         !S_ISLNK(inode->i_mode))
888                 return;
889
890         if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
891                 return;
892
893         spin_lock(&sbi->inode_lock[type]);
894         __remove_dirty_inode(inode, type);
895         spin_unlock(&sbi->inode_lock[type]);
896 }
897
898 int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
899 {
900         struct list_head *head;
901         struct inode *inode;
902         struct f2fs_inode_info *fi;
903         bool is_dir = (type == DIR_INODE);
904
905         trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
906                                 get_pages(sbi, is_dir ?
907                                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
908 retry:
909         if (unlikely(f2fs_cp_error(sbi))) {
910                 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
911                                 get_pages(sbi, is_dir ?
912                                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
913                 return -EIO;
914         }
915
916         spin_lock(&sbi->inode_lock[type]);
917
918         head = &sbi->inode_list[type];
919         if (list_empty(head)) {
920                 spin_unlock(&sbi->inode_lock[type]);
921                 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
922                                 get_pages(sbi, is_dir ?
923                                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
924                 return 0;
925         }
926         fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
927         inode = igrab(&fi->vfs_inode);
928         spin_unlock(&sbi->inode_lock[type]);
929         if (inode) {
930                 filemap_fdatawrite(inode->i_mapping);
931                 iput(inode);
932         } else {
933                 /*
934                  * We should submit bio, since it exists several
935                  * wribacking dentry pages in the freeing inode.
936                  */
937                 f2fs_submit_merged_bio(sbi, DATA, WRITE);
938                 cond_resched();
939         }
940         goto retry;
941 }
942
943 int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
944 {
945         struct list_head *head = &sbi->inode_list[DIRTY_META];
946         struct inode *inode;
947         struct f2fs_inode_info *fi;
948         s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);
949
950         while (total--) {
951                 if (unlikely(f2fs_cp_error(sbi)))
952                         return -EIO;
953
954                 spin_lock(&sbi->inode_lock[DIRTY_META]);
955                 if (list_empty(head)) {
956                         spin_unlock(&sbi->inode_lock[DIRTY_META]);
957                         return 0;
958                 }
959                 fi = list_entry(head->next, struct f2fs_inode_info,
960                                                         gdirty_list);
961                 inode = igrab(&fi->vfs_inode);
962                 spin_unlock(&sbi->inode_lock[DIRTY_META]);
963                 if (inode) {
964                         update_inode_page(inode);
965                         iput(inode);
966                 }
967         };
968         return 0;
969 }
970
971 /*
972  * Freeze all the FS-operations for checkpoint.
973  */
974 static int block_operations(struct f2fs_sb_info *sbi)
975 {
976         struct writeback_control wbc = {
977                 .sync_mode = WB_SYNC_ALL,
978                 .nr_to_write = LONG_MAX,
979                 .for_reclaim = 0,
980         };
981         struct blk_plug plug;
982         int err = 0;
983
984         blk_start_plug(&plug);
985
986 retry_flush_dents:
987         f2fs_lock_all(sbi);
988         /* write all the dirty dentry pages */
989         if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
990                 f2fs_unlock_all(sbi);
991                 err = sync_dirty_inodes(sbi, DIR_INODE);
992                 if (err)
993                         goto out;
994                 goto retry_flush_dents;
995         }
996
997         if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
998                 f2fs_unlock_all(sbi);
999                 err = f2fs_sync_inode_meta(sbi);
1000                 if (err)
1001                         goto out;
1002                 goto retry_flush_dents;
1003         }
1004
1005         /*
1006          * POR: we should ensure that there are no dirty node pages
1007          * until finishing nat/sit flush.
1008          */
1009 retry_flush_nodes:
1010         down_write(&sbi->node_write);
1011
1012         if (get_pages(sbi, F2FS_DIRTY_NODES)) {
1013                 up_write(&sbi->node_write);
1014                 err = sync_node_pages(sbi, &wbc);
1015                 if (err) {
1016                         f2fs_unlock_all(sbi);
1017                         goto out;
1018                 }
1019                 goto retry_flush_nodes;
1020         }
1021 out:
1022         blk_finish_plug(&plug);
1023         return err;
1024 }
1025
1026 static void unblock_operations(struct f2fs_sb_info *sbi)
1027 {
1028         up_write(&sbi->node_write);
1029
1030         build_free_nids(sbi);
1031         f2fs_unlock_all(sbi);
1032 }
1033
1034 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
1035 {
1036         DEFINE_WAIT(wait);
1037
1038         for (;;) {
1039                 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
1040
1041                 if (!atomic_read(&sbi->nr_wb_bios))
1042                         break;
1043
1044                 io_schedule_timeout(5*HZ);
1045         }
1046         finish_wait(&sbi->cp_wait, &wait);
1047 }
1048
1049 static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1050 {
1051         unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
1052         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1053
1054         spin_lock(&sbi->cp_lock);
1055
1056         if (cpc->reason == CP_UMOUNT)
1057                 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1058         else
1059                 __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1060
1061         if (cpc->reason == CP_FASTBOOT)
1062                 __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
1063         else
1064                 __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
1065
1066         if (orphan_num)
1067                 __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
1068         else
1069                 __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
1070
1071         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1072                 __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
1073
1074         /* set this flag to activate crc|cp_ver for recovery */
1075         __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
1076
1077         spin_unlock(&sbi->cp_lock);
1078 }
1079
1080 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1081 {
1082         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1083         struct f2fs_nm_info *nm_i = NM_I(sbi);
1084         unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
1085         nid_t last_nid = nm_i->next_scan_nid;
1086         block_t start_blk;
1087         unsigned int data_sum_blocks, orphan_blocks;
1088         __u32 crc32 = 0;
1089         int i;
1090         int cp_payload_blks = __cp_payload(sbi);
1091         struct super_block *sb = sbi->sb;
1092         struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
1093         u64 kbytes_written;
1094
1095         /* Flush all the NAT/SIT pages */
1096         while (get_pages(sbi, F2FS_DIRTY_META)) {
1097                 sync_meta_pages(sbi, META, LONG_MAX);
1098                 if (unlikely(f2fs_cp_error(sbi)))
1099                         return -EIO;
1100         }
1101
1102         next_free_nid(sbi, &last_nid);
1103
1104         /*
1105          * modify checkpoint
1106          * version number is already updated
1107          */
1108         ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
1109         ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
1110         ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
1111         for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
1112                 ckpt->cur_node_segno[i] =
1113                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
1114                 ckpt->cur_node_blkoff[i] =
1115                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
1116                 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
1117                                 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
1118         }
1119         for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
1120                 ckpt->cur_data_segno[i] =
1121                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
1122                 ckpt->cur_data_blkoff[i] =
1123                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
1124                 ckpt->alloc_type[i + CURSEG_HOT_DATA] =
1125                                 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
1126         }
1127
1128         ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
1129         ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
1130         ckpt->next_free_nid = cpu_to_le32(last_nid);
1131
1132         /* 2 cp  + n data seg summary + orphan inode blocks */
1133         data_sum_blocks = npages_for_summary_flush(sbi, false);
1134         spin_lock(&sbi->cp_lock);
1135         if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1136                 __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
1137         else
1138                 __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
1139         spin_unlock(&sbi->cp_lock);
1140
1141         orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
1142         ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
1143                         orphan_blocks);
1144
1145         if (__remain_node_summaries(cpc->reason))
1146                 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
1147                                 cp_payload_blks + data_sum_blocks +
1148                                 orphan_blocks + NR_CURSEG_NODE_TYPE);
1149         else
1150                 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
1151                                 cp_payload_blks + data_sum_blocks +
1152                                 orphan_blocks);
1153
1154         /* update ckpt flag for checkpoint */
1155         update_ckpt_flags(sbi, cpc);
1156
1157         /* update SIT/NAT bitmap */
1158         get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
1159         get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
1160
1161         crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
1162         *((__le32 *)((unsigned char *)ckpt +
1163                                 le32_to_cpu(ckpt->checksum_offset)))
1164                                 = cpu_to_le32(crc32);
1165
1166         start_blk = __start_cp_next_addr(sbi);
1167
1168         /* need to wait for end_io results */
1169         wait_on_all_pages_writeback(sbi);
1170         if (unlikely(f2fs_cp_error(sbi)))
1171                 return -EIO;
1172
1173         /* write out checkpoint buffer at block 0 */
1174         update_meta_page(sbi, ckpt, start_blk++);
1175
1176         for (i = 1; i < 1 + cp_payload_blks; i++)
1177                 update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
1178                                                         start_blk++);
1179
1180         if (orphan_num) {
1181                 write_orphan_inodes(sbi, start_blk);
1182                 start_blk += orphan_blocks;
1183         }
1184
1185         write_data_summaries(sbi, start_blk);
1186         start_blk += data_sum_blocks;
1187
1188         /* Record write statistics in the hot node summary */
1189         kbytes_written = sbi->kbytes_written;
1190         if (sb->s_bdev->bd_part)
1191                 kbytes_written += BD_PART_WRITTEN(sbi);
1192
1193         seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1194
1195         if (__remain_node_summaries(cpc->reason)) {
1196                 write_node_summaries(sbi, start_blk);
1197                 start_blk += NR_CURSEG_NODE_TYPE;
1198         }
1199
1200         /* writeout checkpoint block */
1201         update_meta_page(sbi, ckpt, start_blk);
1202
1203         /* wait for previous submitted node/meta pages writeback */
1204         wait_on_all_pages_writeback(sbi);
1205
1206         if (unlikely(f2fs_cp_error(sbi)))
1207                 return -EIO;
1208
1209         filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
1210         filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
1211
1212         /* update user_block_counts */
1213         sbi->last_valid_block_count = sbi->total_valid_block_count;
1214         percpu_counter_set(&sbi->alloc_valid_block_count, 0);
1215
1216         /* Here, we only have one bio having CP pack */
1217         sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
1218
1219         /* wait for previous submitted meta pages writeback */
1220         wait_on_all_pages_writeback(sbi);
1221
1222         release_ino_entry(sbi, false);
1223
1224         if (unlikely(f2fs_cp_error(sbi)))
1225                 return -EIO;
1226
1227         clear_prefree_segments(sbi, cpc);
1228         clear_sbi_flag(sbi, SBI_IS_DIRTY);
1229         clear_sbi_flag(sbi, SBI_NEED_CP);
1230         __set_cp_next_pack(sbi);
1231
1232         /*
1233          * redirty superblock if metadata like node page or inode cache is
1234          * updated during writing checkpoint.
1235          */
1236         if (get_pages(sbi, F2FS_DIRTY_NODES) ||
1237                         get_pages(sbi, F2FS_DIRTY_IMETA))
1238                 set_sbi_flag(sbi, SBI_IS_DIRTY);
1239
1240         f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
1241
1242         return 0;
1243 }
1244
1245 /*
1246  * We guarantee that this checkpoint procedure will not fail.
1247  */
1248 int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1249 {
1250         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1251         unsigned long long ckpt_ver;
1252         int err = 0;
1253
1254         mutex_lock(&sbi->cp_mutex);
1255
1256         if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1257                 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
1258                 (cpc->reason == CP_DISCARD && !sbi->discard_blks)))
1259                 goto out;
1260         if (unlikely(f2fs_cp_error(sbi))) {
1261                 err = -EIO;
1262                 goto out;
1263         }
1264         if (f2fs_readonly(sbi->sb)) {
1265                 err = -EROFS;
1266                 goto out;
1267         }
1268
1269         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1270
1271         err = block_operations(sbi);
1272         if (err)
1273                 goto out;
1274
1275         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1276
1277         f2fs_flush_merged_bios(sbi);
1278
1279         /* this is the case of multiple fstrims without any changes */
1280         if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
1281                 f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
1282                 f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
1283                 f2fs_bug_on(sbi, prefree_segments(sbi));
1284                 flush_sit_entries(sbi, cpc);
1285                 clear_prefree_segments(sbi, cpc);
1286                 f2fs_wait_all_discard_bio(sbi);
1287                 unblock_operations(sbi);
1288                 goto out;
1289         }
1290
1291         /*
1292          * update checkpoint pack index
1293          * Increase the version number so that
1294          * SIT entries and seg summaries are written at correct place
1295          */
1296         ckpt_ver = cur_cp_version(ckpt);
1297         ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
1298
1299         /* write cached NAT/SIT entries to NAT/SIT area */
1300         flush_nat_entries(sbi);
1301         flush_sit_entries(sbi, cpc);
1302
1303         /* unlock all the fs_lock[] in do_checkpoint() */
1304         err = do_checkpoint(sbi, cpc);
1305
1306         f2fs_wait_all_discard_bio(sbi);
1307
1308         unblock_operations(sbi);
1309         stat_inc_cp_count(sbi->stat_info);
1310
1311         if (cpc->reason == CP_RECOVERY)
1312                 f2fs_msg(sbi->sb, KERN_NOTICE,
1313                         "checkpoint: version = %llx", ckpt_ver);
1314
1315         /* do checkpoint periodically */
1316         f2fs_update_time(sbi, CP_TIME);
1317         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
1318 out:
1319         mutex_unlock(&sbi->cp_mutex);
1320         return err;
1321 }
1322
1323 void init_ino_entry_info(struct f2fs_sb_info *sbi)
1324 {
1325         int i;
1326
1327         for (i = 0; i < MAX_INO_ENTRY; i++) {
1328                 struct inode_management *im = &sbi->im[i];
1329
1330                 INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
1331                 spin_lock_init(&im->ino_lock);
1332                 INIT_LIST_HEAD(&im->ino_list);
1333                 im->ino_num = 0;
1334         }
1335
1336         sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1337                         NR_CURSEG_TYPE - __cp_payload(sbi)) *
1338                                 F2FS_ORPHANS_PER_BLOCK;
1339 }
1340
1341 int __init create_checkpoint_caches(void)
1342 {
1343         ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
1344                         sizeof(struct ino_entry));
1345         if (!ino_entry_slab)
1346                 return -ENOMEM;
1347         inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
1348                         sizeof(struct inode_entry));
1349         if (!inode_entry_slab) {
1350                 kmem_cache_destroy(ino_entry_slab);
1351                 return -ENOMEM;
1352         }
1353         return 0;
1354 }
1355
1356 void destroy_checkpoint_caches(void)
1357 {
1358         kmem_cache_destroy(ino_entry_slab);
1359         kmem_cache_destroy(inode_entry_slab);
1360 }