GNU Linux-libre 4.14.290-gnu1
[releases.git] / fs / f2fs / node.c
1 /*
2  * fs/f2fs/node.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/mpage.h>
14 #include <linux/backing-dev.h>
15 #include <linux/blkdev.h>
16 #include <linux/pagevec.h>
17 #include <linux/swap.h>
18
19 #include "f2fs.h"
20 #include "node.h"
21 #include "segment.h"
22 #include "xattr.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 #define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
27
28 static struct kmem_cache *nat_entry_slab;
29 static struct kmem_cache *free_nid_slab;
30 static struct kmem_cache *nat_entry_set_slab;
31
32 /*
33  * Check whether the given nid is within node id range.
34  */
35 int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
36 {
37         if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
38                 set_sbi_flag(sbi, SBI_NEED_FSCK);
39                 f2fs_msg(sbi->sb, KERN_WARNING,
40                                 "%s: out-of-range nid=%x, run fsck to fix.",
41                                 __func__, nid);
42                 return -EFSCORRUPTED;
43         }
44         return 0;
45 }
46
47 bool available_free_memory(struct f2fs_sb_info *sbi, int type)
48 {
49         struct f2fs_nm_info *nm_i = NM_I(sbi);
50         struct sysinfo val;
51         unsigned long avail_ram;
52         unsigned long mem_size = 0;
53         bool res = false;
54
55         si_meminfo(&val);
56
57         /* only uses low memory */
58         avail_ram = val.totalram - val.totalhigh;
59
60         /*
61          * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
62          */
63         if (type == FREE_NIDS) {
64                 mem_size = (nm_i->nid_cnt[FREE_NID_LIST] *
65                                 sizeof(struct free_nid)) >> PAGE_SHIFT;
66                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
67         } else if (type == NAT_ENTRIES) {
68                 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
69                                                         PAGE_SHIFT;
70                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
71                 if (excess_cached_nats(sbi))
72                         res = false;
73         } else if (type == DIRTY_DENTS) {
74                 if (sbi->sb->s_bdi->wb.dirty_exceeded)
75                         return false;
76                 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
77                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
78         } else if (type == INO_ENTRIES) {
79                 int i;
80
81                 for (i = 0; i <= UPDATE_INO; i++)
82                         mem_size += sbi->im[i].ino_num *
83                                                 sizeof(struct ino_entry);
84                 mem_size >>= PAGE_SHIFT;
85                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
86         } else if (type == EXTENT_CACHE) {
87                 mem_size = (atomic_read(&sbi->total_ext_tree) *
88                                 sizeof(struct extent_tree) +
89                                 atomic_read(&sbi->total_ext_node) *
90                                 sizeof(struct extent_node)) >> PAGE_SHIFT;
91                 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
92         } else {
93                 if (!sbi->sb->s_bdi->wb.dirty_exceeded)
94                         return true;
95         }
96         return res;
97 }
98
99 static void clear_node_page_dirty(struct page *page)
100 {
101         struct address_space *mapping = page->mapping;
102         unsigned int long flags;
103
104         if (PageDirty(page)) {
105                 spin_lock_irqsave(&mapping->tree_lock, flags);
106                 radix_tree_tag_clear(&mapping->page_tree,
107                                 page_index(page),
108                                 PAGECACHE_TAG_DIRTY);
109                 spin_unlock_irqrestore(&mapping->tree_lock, flags);
110
111                 clear_page_dirty_for_io(page);
112                 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
113         }
114         ClearPageUptodate(page);
115 }
116
117 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
118 {
119         pgoff_t index = current_nat_addr(sbi, nid);
120         return get_meta_page(sbi, index);
121 }
122
123 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
124 {
125         struct page *src_page;
126         struct page *dst_page;
127         pgoff_t src_off;
128         pgoff_t dst_off;
129         void *src_addr;
130         void *dst_addr;
131         struct f2fs_nm_info *nm_i = NM_I(sbi);
132
133         src_off = current_nat_addr(sbi, nid);
134         dst_off = next_nat_addr(sbi, src_off);
135
136         /* get current nat block page with lock */
137         src_page = get_meta_page(sbi, src_off);
138         dst_page = grab_meta_page(sbi, dst_off);
139         f2fs_bug_on(sbi, PageDirty(src_page));
140
141         src_addr = page_address(src_page);
142         dst_addr = page_address(dst_page);
143         memcpy(dst_addr, src_addr, PAGE_SIZE);
144         set_page_dirty(dst_page);
145         f2fs_put_page(src_page, 1);
146
147         set_to_next_nat(nm_i, nid);
148
149         return dst_page;
150 }
151
152 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
153 {
154         return radix_tree_lookup(&nm_i->nat_root, n);
155 }
156
157 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
158                 nid_t start, unsigned int nr, struct nat_entry **ep)
159 {
160         return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
161 }
162
163 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
164 {
165         list_del(&e->list);
166         radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
167         nm_i->nat_cnt--;
168         kmem_cache_free(nat_entry_slab, e);
169 }
170
171 static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
172                                                 struct nat_entry *ne)
173 {
174         nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
175         struct nat_entry_set *head;
176
177         head = radix_tree_lookup(&nm_i->nat_set_root, set);
178         if (!head) {
179                 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
180
181                 INIT_LIST_HEAD(&head->entry_list);
182                 INIT_LIST_HEAD(&head->set_list);
183                 head->set = set;
184                 head->entry_cnt = 0;
185                 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
186         }
187
188         if (get_nat_flag(ne, IS_DIRTY))
189                 goto refresh_list;
190
191         nm_i->dirty_nat_cnt++;
192         head->entry_cnt++;
193         set_nat_flag(ne, IS_DIRTY, true);
194 refresh_list:
195         if (nat_get_blkaddr(ne) == NEW_ADDR)
196                 list_del_init(&ne->list);
197         else
198                 list_move_tail(&ne->list, &head->entry_list);
199 }
200
201 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
202                 struct nat_entry_set *set, struct nat_entry *ne)
203 {
204         list_move_tail(&ne->list, &nm_i->nat_entries);
205         set_nat_flag(ne, IS_DIRTY, false);
206         set->entry_cnt--;
207         nm_i->dirty_nat_cnt--;
208 }
209
210 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
211                 nid_t start, unsigned int nr, struct nat_entry_set **ep)
212 {
213         return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
214                                                         start, nr);
215 }
216
217 int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
218 {
219         struct f2fs_nm_info *nm_i = NM_I(sbi);
220         struct nat_entry *e;
221         bool need = false;
222
223         down_read(&nm_i->nat_tree_lock);
224         e = __lookup_nat_cache(nm_i, nid);
225         if (e) {
226                 if (!get_nat_flag(e, IS_CHECKPOINTED) &&
227                                 !get_nat_flag(e, HAS_FSYNCED_INODE))
228                         need = true;
229         }
230         up_read(&nm_i->nat_tree_lock);
231         return need;
232 }
233
234 bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
235 {
236         struct f2fs_nm_info *nm_i = NM_I(sbi);
237         struct nat_entry *e;
238         bool is_cp = true;
239
240         down_read(&nm_i->nat_tree_lock);
241         e = __lookup_nat_cache(nm_i, nid);
242         if (e && !get_nat_flag(e, IS_CHECKPOINTED))
243                 is_cp = false;
244         up_read(&nm_i->nat_tree_lock);
245         return is_cp;
246 }
247
248 bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
249 {
250         struct f2fs_nm_info *nm_i = NM_I(sbi);
251         struct nat_entry *e;
252         bool need_update = true;
253
254         down_read(&nm_i->nat_tree_lock);
255         e = __lookup_nat_cache(nm_i, ino);
256         if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
257                         (get_nat_flag(e, IS_CHECKPOINTED) ||
258                          get_nat_flag(e, HAS_FSYNCED_INODE)))
259                 need_update = false;
260         up_read(&nm_i->nat_tree_lock);
261         return need_update;
262 }
263
264 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
265                                                                 bool no_fail)
266 {
267         struct nat_entry *new;
268
269         if (no_fail) {
270                 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
271                 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
272         } else {
273                 new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
274                 if (!new)
275                         return NULL;
276                 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
277                         kmem_cache_free(nat_entry_slab, new);
278                         return NULL;
279                 }
280         }
281
282         memset(new, 0, sizeof(struct nat_entry));
283         nat_set_nid(new, nid);
284         nat_reset_flag(new);
285         list_add_tail(&new->list, &nm_i->nat_entries);
286         nm_i->nat_cnt++;
287         return new;
288 }
289
290 static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
291                                                 struct f2fs_nat_entry *ne)
292 {
293         struct f2fs_nm_info *nm_i = NM_I(sbi);
294         struct nat_entry *e;
295
296         e = __lookup_nat_cache(nm_i, nid);
297         if (!e) {
298                 e = grab_nat_entry(nm_i, nid, false);
299                 if (e)
300                         node_info_from_raw_nat(&e->ni, ne);
301         } else {
302                 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
303                                 nat_get_blkaddr(e) !=
304                                         le32_to_cpu(ne->block_addr) ||
305                                 nat_get_version(e) != ne->version);
306         }
307 }
308
309 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
310                         block_t new_blkaddr, bool fsync_done)
311 {
312         struct f2fs_nm_info *nm_i = NM_I(sbi);
313         struct nat_entry *e;
314
315         down_write(&nm_i->nat_tree_lock);
316         e = __lookup_nat_cache(nm_i, ni->nid);
317         if (!e) {
318                 e = grab_nat_entry(nm_i, ni->nid, true);
319                 copy_node_info(&e->ni, ni);
320                 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
321         } else if (new_blkaddr == NEW_ADDR) {
322                 /*
323                  * when nid is reallocated,
324                  * previous nat entry can be remained in nat cache.
325                  * So, reinitialize it with new information.
326                  */
327                 copy_node_info(&e->ni, ni);
328                 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
329         }
330
331         /* sanity check */
332         f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
333         f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
334                         new_blkaddr == NULL_ADDR);
335         f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
336                         new_blkaddr == NEW_ADDR);
337         f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
338                         new_blkaddr == NEW_ADDR);
339
340         /* increment version no as node is removed */
341         if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
342                 unsigned char version = nat_get_version(e);
343                 nat_set_version(e, inc_node_version(version));
344
345                 /* in order to reuse the nid */
346                 if (nm_i->next_scan_nid > ni->nid)
347                         nm_i->next_scan_nid = ni->nid;
348         }
349
350         /* change address */
351         nat_set_blkaddr(e, new_blkaddr);
352         if (!is_valid_data_blkaddr(sbi, new_blkaddr))
353                 set_nat_flag(e, IS_CHECKPOINTED, false);
354         __set_nat_cache_dirty(nm_i, e);
355
356         /* update fsync_mark if its inode nat entry is still alive */
357         if (ni->nid != ni->ino)
358                 e = __lookup_nat_cache(nm_i, ni->ino);
359         if (e) {
360                 if (fsync_done && ni->nid == ni->ino)
361                         set_nat_flag(e, HAS_FSYNCED_INODE, true);
362                 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
363         }
364         up_write(&nm_i->nat_tree_lock);
365 }
366
367 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
368 {
369         struct f2fs_nm_info *nm_i = NM_I(sbi);
370         int nr = nr_shrink;
371
372         if (!down_write_trylock(&nm_i->nat_tree_lock))
373                 return 0;
374
375         while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
376                 struct nat_entry *ne;
377                 ne = list_first_entry(&nm_i->nat_entries,
378                                         struct nat_entry, list);
379                 __del_from_nat_cache(nm_i, ne);
380                 nr_shrink--;
381         }
382         up_write(&nm_i->nat_tree_lock);
383         return nr - nr_shrink;
384 }
385
386 /*
387  * This function always returns success
388  */
389 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
390 {
391         struct f2fs_nm_info *nm_i = NM_I(sbi);
392         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
393         struct f2fs_journal *journal = curseg->journal;
394         nid_t start_nid = START_NID(nid);
395         struct f2fs_nat_block *nat_blk;
396         struct page *page = NULL;
397         struct f2fs_nat_entry ne;
398         struct nat_entry *e;
399         pgoff_t index;
400         int i;
401
402         ni->nid = nid;
403
404         /* Check nat cache */
405         down_read(&nm_i->nat_tree_lock);
406         e = __lookup_nat_cache(nm_i, nid);
407         if (e) {
408                 ni->ino = nat_get_ino(e);
409                 ni->blk_addr = nat_get_blkaddr(e);
410                 ni->version = nat_get_version(e);
411                 up_read(&nm_i->nat_tree_lock);
412                 return;
413         }
414
415         memset(&ne, 0, sizeof(struct f2fs_nat_entry));
416
417         /* Check current segment summary */
418         down_read(&curseg->journal_rwsem);
419         i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
420         if (i >= 0) {
421                 ne = nat_in_journal(journal, i);
422                 node_info_from_raw_nat(ni, &ne);
423         }
424         up_read(&curseg->journal_rwsem);
425         if (i >= 0) {
426                 up_read(&nm_i->nat_tree_lock);
427                 goto cache;
428         }
429
430         /* Fill node_info from nat page */
431         index = current_nat_addr(sbi, nid);
432         up_read(&nm_i->nat_tree_lock);
433
434         page = get_meta_page(sbi, index);
435         nat_blk = (struct f2fs_nat_block *)page_address(page);
436         ne = nat_blk->entries[nid - start_nid];
437         node_info_from_raw_nat(ni, &ne);
438         f2fs_put_page(page, 1);
439 cache:
440         /* cache nat entry */
441         down_write(&nm_i->nat_tree_lock);
442         cache_nat_entry(sbi, nid, &ne);
443         up_write(&nm_i->nat_tree_lock);
444 }
445
446 /*
447  * readahead MAX_RA_NODE number of node pages.
448  */
449 static void ra_node_pages(struct page *parent, int start, int n)
450 {
451         struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
452         struct blk_plug plug;
453         int i, end;
454         nid_t nid;
455
456         blk_start_plug(&plug);
457
458         /* Then, try readahead for siblings of the desired node */
459         end = start + n;
460         end = min(end, NIDS_PER_BLOCK);
461         for (i = start; i < end; i++) {
462                 nid = get_nid(parent, i, false);
463                 ra_node_page(sbi, nid);
464         }
465
466         blk_finish_plug(&plug);
467 }
468
469 pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
470 {
471         const long direct_index = ADDRS_PER_INODE(dn->inode);
472         const long direct_blks = ADDRS_PER_BLOCK;
473         const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
474         unsigned int skipped_unit = ADDRS_PER_BLOCK;
475         int cur_level = dn->cur_level;
476         int max_level = dn->max_level;
477         pgoff_t base = 0;
478
479         if (!dn->max_level)
480                 return pgofs + 1;
481
482         while (max_level-- > cur_level)
483                 skipped_unit *= NIDS_PER_BLOCK;
484
485         switch (dn->max_level) {
486         case 3:
487                 base += 2 * indirect_blks;
488         case 2:
489                 base += 2 * direct_blks;
490         case 1:
491                 base += direct_index;
492                 break;
493         default:
494                 f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
495         }
496
497         return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
498 }
499
500 /*
501  * The maximum depth is four.
502  * Offset[0] will have raw inode offset.
503  */
504 static int get_node_path(struct inode *inode, long block,
505                                 int offset[4], unsigned int noffset[4])
506 {
507         const long direct_index = ADDRS_PER_INODE(inode);
508         const long direct_blks = ADDRS_PER_BLOCK;
509         const long dptrs_per_blk = NIDS_PER_BLOCK;
510         const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
511         const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
512         int n = 0;
513         int level = 0;
514
515         noffset[0] = 0;
516
517         if (block < direct_index) {
518                 offset[n] = block;
519                 goto got;
520         }
521         block -= direct_index;
522         if (block < direct_blks) {
523                 offset[n++] = NODE_DIR1_BLOCK;
524                 noffset[n] = 1;
525                 offset[n] = block;
526                 level = 1;
527                 goto got;
528         }
529         block -= direct_blks;
530         if (block < direct_blks) {
531                 offset[n++] = NODE_DIR2_BLOCK;
532                 noffset[n] = 2;
533                 offset[n] = block;
534                 level = 1;
535                 goto got;
536         }
537         block -= direct_blks;
538         if (block < indirect_blks) {
539                 offset[n++] = NODE_IND1_BLOCK;
540                 noffset[n] = 3;
541                 offset[n++] = block / direct_blks;
542                 noffset[n] = 4 + offset[n - 1];
543                 offset[n] = block % direct_blks;
544                 level = 2;
545                 goto got;
546         }
547         block -= indirect_blks;
548         if (block < indirect_blks) {
549                 offset[n++] = NODE_IND2_BLOCK;
550                 noffset[n] = 4 + dptrs_per_blk;
551                 offset[n++] = block / direct_blks;
552                 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
553                 offset[n] = block % direct_blks;
554                 level = 2;
555                 goto got;
556         }
557         block -= indirect_blks;
558         if (block < dindirect_blks) {
559                 offset[n++] = NODE_DIND_BLOCK;
560                 noffset[n] = 5 + (dptrs_per_blk * 2);
561                 offset[n++] = block / indirect_blks;
562                 noffset[n] = 6 + (dptrs_per_blk * 2) +
563                               offset[n - 1] * (dptrs_per_blk + 1);
564                 offset[n++] = (block / direct_blks) % dptrs_per_blk;
565                 noffset[n] = 7 + (dptrs_per_blk * 2) +
566                               offset[n - 2] * (dptrs_per_blk + 1) +
567                               offset[n - 1];
568                 offset[n] = block % direct_blks;
569                 level = 3;
570                 goto got;
571         } else {
572                 return -E2BIG;
573         }
574 got:
575         return level;
576 }
577
578 /*
579  * Caller should call f2fs_put_dnode(dn).
580  * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
581  * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
582  * In the case of RDONLY_NODE, we don't need to care about mutex.
583  */
584 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
585 {
586         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
587         struct page *npage[4];
588         struct page *parent = NULL;
589         int offset[4];
590         unsigned int noffset[4];
591         nid_t nids[4];
592         int level, i = 0;
593         int err = 0;
594
595         level = get_node_path(dn->inode, index, offset, noffset);
596         if (level < 0)
597                 return level;
598
599         nids[0] = dn->inode->i_ino;
600         npage[0] = dn->inode_page;
601
602         if (!npage[0]) {
603                 npage[0] = get_node_page(sbi, nids[0]);
604                 if (IS_ERR(npage[0]))
605                         return PTR_ERR(npage[0]);
606         }
607
608         /* if inline_data is set, should not report any block indices */
609         if (f2fs_has_inline_data(dn->inode) && index) {
610                 err = -ENOENT;
611                 f2fs_put_page(npage[0], 1);
612                 goto release_out;
613         }
614
615         parent = npage[0];
616         if (level != 0)
617                 nids[1] = get_nid(parent, offset[0], true);
618         dn->inode_page = npage[0];
619         dn->inode_page_locked = true;
620
621         /* get indirect or direct nodes */
622         for (i = 1; i <= level; i++) {
623                 bool done = false;
624
625                 if (!nids[i] && mode == ALLOC_NODE) {
626                         /* alloc new node */
627                         if (!alloc_nid(sbi, &(nids[i]))) {
628                                 err = -ENOSPC;
629                                 goto release_pages;
630                         }
631
632                         dn->nid = nids[i];
633                         npage[i] = new_node_page(dn, noffset[i]);
634                         if (IS_ERR(npage[i])) {
635                                 alloc_nid_failed(sbi, nids[i]);
636                                 err = PTR_ERR(npage[i]);
637                                 goto release_pages;
638                         }
639
640                         set_nid(parent, offset[i - 1], nids[i], i == 1);
641                         alloc_nid_done(sbi, nids[i]);
642                         done = true;
643                 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
644                         npage[i] = get_node_page_ra(parent, offset[i - 1]);
645                         if (IS_ERR(npage[i])) {
646                                 err = PTR_ERR(npage[i]);
647                                 goto release_pages;
648                         }
649                         done = true;
650                 }
651                 if (i == 1) {
652                         dn->inode_page_locked = false;
653                         unlock_page(parent);
654                 } else {
655                         f2fs_put_page(parent, 1);
656                 }
657
658                 if (!done) {
659                         npage[i] = get_node_page(sbi, nids[i]);
660                         if (IS_ERR(npage[i])) {
661                                 err = PTR_ERR(npage[i]);
662                                 f2fs_put_page(npage[0], 0);
663                                 goto release_out;
664                         }
665                 }
666                 if (i < level) {
667                         parent = npage[i];
668                         nids[i + 1] = get_nid(parent, offset[i], false);
669                 }
670         }
671         dn->nid = nids[level];
672         dn->ofs_in_node = offset[level];
673         dn->node_page = npage[level];
674         dn->data_blkaddr = datablock_addr(dn->inode,
675                                 dn->node_page, dn->ofs_in_node);
676         return 0;
677
678 release_pages:
679         f2fs_put_page(parent, 1);
680         if (i > 1)
681                 f2fs_put_page(npage[0], 0);
682 release_out:
683         dn->inode_page = NULL;
684         dn->node_page = NULL;
685         if (err == -ENOENT) {
686                 dn->cur_level = i;
687                 dn->max_level = level;
688                 dn->ofs_in_node = offset[level];
689         }
690         return err;
691 }
692
693 static void truncate_node(struct dnode_of_data *dn)
694 {
695         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
696         struct node_info ni;
697         pgoff_t index;
698
699         get_node_info(sbi, dn->nid, &ni);
700         f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
701
702         /* Deallocate node address */
703         invalidate_blocks(sbi, ni.blk_addr);
704         dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
705         set_node_addr(sbi, &ni, NULL_ADDR, false);
706
707         if (dn->nid == dn->inode->i_ino) {
708                 remove_orphan_inode(sbi, dn->nid);
709                 dec_valid_inode_count(sbi);
710                 f2fs_inode_synced(dn->inode);
711         }
712
713         clear_node_page_dirty(dn->node_page);
714         set_sbi_flag(sbi, SBI_IS_DIRTY);
715
716         index = dn->node_page->index;
717         f2fs_put_page(dn->node_page, 1);
718
719         invalidate_mapping_pages(NODE_MAPPING(sbi),
720                         index, index);
721
722         dn->node_page = NULL;
723         trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
724 }
725
726 static int truncate_dnode(struct dnode_of_data *dn)
727 {
728         struct page *page;
729
730         if (dn->nid == 0)
731                 return 1;
732
733         /* get direct node */
734         page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
735         if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
736                 return 1;
737         else if (IS_ERR(page))
738                 return PTR_ERR(page);
739
740         /* Make dnode_of_data for parameter */
741         dn->node_page = page;
742         dn->ofs_in_node = 0;
743         truncate_data_blocks(dn);
744         truncate_node(dn);
745         return 1;
746 }
747
748 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
749                                                 int ofs, int depth)
750 {
751         struct dnode_of_data rdn = *dn;
752         struct page *page;
753         struct f2fs_node *rn;
754         nid_t child_nid;
755         unsigned int child_nofs;
756         int freed = 0;
757         int i, ret;
758
759         if (dn->nid == 0)
760                 return NIDS_PER_BLOCK + 1;
761
762         trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
763
764         page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
765         if (IS_ERR(page)) {
766                 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
767                 return PTR_ERR(page);
768         }
769
770         ra_node_pages(page, ofs, NIDS_PER_BLOCK);
771
772         rn = F2FS_NODE(page);
773         if (depth < 3) {
774                 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
775                         child_nid = le32_to_cpu(rn->in.nid[i]);
776                         if (child_nid == 0)
777                                 continue;
778                         rdn.nid = child_nid;
779                         ret = truncate_dnode(&rdn);
780                         if (ret < 0)
781                                 goto out_err;
782                         if (set_nid(page, i, 0, false))
783                                 dn->node_changed = true;
784                 }
785         } else {
786                 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
787                 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
788                         child_nid = le32_to_cpu(rn->in.nid[i]);
789                         if (child_nid == 0) {
790                                 child_nofs += NIDS_PER_BLOCK + 1;
791                                 continue;
792                         }
793                         rdn.nid = child_nid;
794                         ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
795                         if (ret == (NIDS_PER_BLOCK + 1)) {
796                                 if (set_nid(page, i, 0, false))
797                                         dn->node_changed = true;
798                                 child_nofs += ret;
799                         } else if (ret < 0 && ret != -ENOENT) {
800                                 goto out_err;
801                         }
802                 }
803                 freed = child_nofs;
804         }
805
806         if (!ofs) {
807                 /* remove current indirect node */
808                 dn->node_page = page;
809                 truncate_node(dn);
810                 freed++;
811         } else {
812                 f2fs_put_page(page, 1);
813         }
814         trace_f2fs_truncate_nodes_exit(dn->inode, freed);
815         return freed;
816
817 out_err:
818         f2fs_put_page(page, 1);
819         trace_f2fs_truncate_nodes_exit(dn->inode, ret);
820         return ret;
821 }
822
823 static int truncate_partial_nodes(struct dnode_of_data *dn,
824                         struct f2fs_inode *ri, int *offset, int depth)
825 {
826         struct page *pages[2];
827         nid_t nid[3];
828         nid_t child_nid;
829         int err = 0;
830         int i;
831         int idx = depth - 2;
832
833         nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
834         if (!nid[0])
835                 return 0;
836
837         /* get indirect nodes in the path */
838         for (i = 0; i < idx + 1; i++) {
839                 /* reference count'll be increased */
840                 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
841                 if (IS_ERR(pages[i])) {
842                         err = PTR_ERR(pages[i]);
843                         idx = i - 1;
844                         goto fail;
845                 }
846                 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
847         }
848
849         ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
850
851         /* free direct nodes linked to a partial indirect node */
852         for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
853                 child_nid = get_nid(pages[idx], i, false);
854                 if (!child_nid)
855                         continue;
856                 dn->nid = child_nid;
857                 err = truncate_dnode(dn);
858                 if (err < 0)
859                         goto fail;
860                 if (set_nid(pages[idx], i, 0, false))
861                         dn->node_changed = true;
862         }
863
864         if (offset[idx + 1] == 0) {
865                 dn->node_page = pages[idx];
866                 dn->nid = nid[idx];
867                 truncate_node(dn);
868         } else {
869                 f2fs_put_page(pages[idx], 1);
870         }
871         offset[idx]++;
872         offset[idx + 1] = 0;
873         idx--;
874 fail:
875         for (i = idx; i >= 0; i--)
876                 f2fs_put_page(pages[i], 1);
877
878         trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
879
880         return err;
881 }
882
883 /*
884  * All the block addresses of data and nodes should be nullified.
885  */
886 int truncate_inode_blocks(struct inode *inode, pgoff_t from)
887 {
888         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
889         int err = 0, cont = 1;
890         int level, offset[4], noffset[4];
891         unsigned int nofs = 0;
892         struct f2fs_inode *ri;
893         struct dnode_of_data dn;
894         struct page *page;
895
896         trace_f2fs_truncate_inode_blocks_enter(inode, from);
897
898         level = get_node_path(inode, from, offset, noffset);
899         if (level < 0)
900                 return level;
901
902         page = get_node_page(sbi, inode->i_ino);
903         if (IS_ERR(page)) {
904                 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
905                 return PTR_ERR(page);
906         }
907
908         set_new_dnode(&dn, inode, page, NULL, 0);
909         unlock_page(page);
910
911         ri = F2FS_INODE(page);
912         switch (level) {
913         case 0:
914         case 1:
915                 nofs = noffset[1];
916                 break;
917         case 2:
918                 nofs = noffset[1];
919                 if (!offset[level - 1])
920                         goto skip_partial;
921                 err = truncate_partial_nodes(&dn, ri, offset, level);
922                 if (err < 0 && err != -ENOENT)
923                         goto fail;
924                 nofs += 1 + NIDS_PER_BLOCK;
925                 break;
926         case 3:
927                 nofs = 5 + 2 * NIDS_PER_BLOCK;
928                 if (!offset[level - 1])
929                         goto skip_partial;
930                 err = truncate_partial_nodes(&dn, ri, offset, level);
931                 if (err < 0 && err != -ENOENT)
932                         goto fail;
933                 break;
934         default:
935                 BUG();
936         }
937
938 skip_partial:
939         while (cont) {
940                 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
941                 switch (offset[0]) {
942                 case NODE_DIR1_BLOCK:
943                 case NODE_DIR2_BLOCK:
944                         err = truncate_dnode(&dn);
945                         break;
946
947                 case NODE_IND1_BLOCK:
948                 case NODE_IND2_BLOCK:
949                         err = truncate_nodes(&dn, nofs, offset[1], 2);
950                         break;
951
952                 case NODE_DIND_BLOCK:
953                         err = truncate_nodes(&dn, nofs, offset[1], 3);
954                         cont = 0;
955                         break;
956
957                 default:
958                         BUG();
959                 }
960                 if (err < 0 && err != -ENOENT)
961                         goto fail;
962                 if (offset[1] == 0 &&
963                                 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
964                         lock_page(page);
965                         BUG_ON(page->mapping != NODE_MAPPING(sbi));
966                         f2fs_wait_on_page_writeback(page, NODE, true);
967                         ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
968                         set_page_dirty(page);
969                         unlock_page(page);
970                 }
971                 offset[1] = 0;
972                 offset[0]++;
973                 nofs += err;
974         }
975 fail:
976         f2fs_put_page(page, 0);
977         trace_f2fs_truncate_inode_blocks_exit(inode, err);
978         return err > 0 ? 0 : err;
979 }
980
981 int truncate_xattr_node(struct inode *inode, struct page *page)
982 {
983         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
984         nid_t nid = F2FS_I(inode)->i_xattr_nid;
985         struct dnode_of_data dn;
986         struct page *npage;
987
988         if (!nid)
989                 return 0;
990
991         npage = get_node_page(sbi, nid);
992         if (IS_ERR(npage))
993                 return PTR_ERR(npage);
994
995         f2fs_i_xnid_write(inode, 0);
996
997         set_new_dnode(&dn, inode, page, npage, nid);
998
999         if (page)
1000                 dn.inode_page_locked = true;
1001         truncate_node(&dn);
1002         return 0;
1003 }
1004
1005 /*
1006  * Caller should grab and release a rwsem by calling f2fs_lock_op() and
1007  * f2fs_unlock_op().
1008  */
1009 int remove_inode_page(struct inode *inode)
1010 {
1011         struct dnode_of_data dn;
1012         int err;
1013
1014         set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
1015         err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
1016         if (err)
1017                 return err;
1018
1019         err = truncate_xattr_node(inode, dn.inode_page);
1020         if (err) {
1021                 f2fs_put_dnode(&dn);
1022                 return err;
1023         }
1024
1025         /* remove potential inline_data blocks */
1026         if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1027                                 S_ISLNK(inode->i_mode))
1028                 truncate_data_blocks_range(&dn, 1);
1029
1030         /* 0 is possible, after f2fs_new_inode() has failed */
1031         f2fs_bug_on(F2FS_I_SB(inode),
1032                         inode->i_blocks != 0 && inode->i_blocks != 8);
1033
1034         /* will put inode & node pages */
1035         truncate_node(&dn);
1036         return 0;
1037 }
1038
1039 struct page *new_inode_page(struct inode *inode)
1040 {
1041         struct dnode_of_data dn;
1042
1043         /* allocate inode page for new inode */
1044         set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
1045
1046         /* caller should f2fs_put_page(page, 1); */
1047         return new_node_page(&dn, 0);
1048 }
1049
1050 struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
1051 {
1052         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1053         struct node_info new_ni;
1054         struct page *page;
1055         int err;
1056
1057         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1058                 return ERR_PTR(-EPERM);
1059
1060         page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
1061         if (!page)
1062                 return ERR_PTR(-ENOMEM);
1063
1064         if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
1065                 goto fail;
1066
1067 #ifdef CONFIG_F2FS_CHECK_FS
1068         get_node_info(sbi, dn->nid, &new_ni);
1069         f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
1070 #endif
1071         new_ni.nid = dn->nid;
1072         new_ni.ino = dn->inode->i_ino;
1073         new_ni.blk_addr = NULL_ADDR;
1074         new_ni.flag = 0;
1075         new_ni.version = 0;
1076         set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1077
1078         f2fs_wait_on_page_writeback(page, NODE, true);
1079         fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
1080         set_cold_node(dn->inode, page);
1081         if (!PageUptodate(page))
1082                 SetPageUptodate(page);
1083         if (set_page_dirty(page))
1084                 dn->node_changed = true;
1085
1086         if (f2fs_has_xattr_block(ofs))
1087                 f2fs_i_xnid_write(dn->inode, dn->nid);
1088
1089         if (ofs == 0)
1090                 inc_valid_inode_count(sbi);
1091         return page;
1092
1093 fail:
1094         clear_node_page_dirty(page);
1095         f2fs_put_page(page, 1);
1096         return ERR_PTR(err);
1097 }
1098
1099 /*
1100  * Caller should do after getting the following values.
1101  * 0: f2fs_put_page(page, 0)
1102  * LOCKED_PAGE or error: f2fs_put_page(page, 1)
1103  */
1104 static int read_node_page(struct page *page, int op_flags)
1105 {
1106         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1107         struct node_info ni;
1108         struct f2fs_io_info fio = {
1109                 .sbi = sbi,
1110                 .type = NODE,
1111                 .op = REQ_OP_READ,
1112                 .op_flags = op_flags,
1113                 .page = page,
1114                 .encrypted_page = NULL,
1115         };
1116
1117         if (PageUptodate(page))
1118                 return LOCKED_PAGE;
1119
1120         get_node_info(sbi, page->index, &ni);
1121
1122         if (unlikely(ni.blk_addr == NULL_ADDR)) {
1123                 ClearPageUptodate(page);
1124                 return -ENOENT;
1125         }
1126
1127         fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
1128         return f2fs_submit_page_bio(&fio);
1129 }
1130
1131 /*
1132  * Readahead a node page
1133  */
1134 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1135 {
1136         struct page *apage;
1137         int err;
1138
1139         if (!nid)
1140                 return;
1141         if (check_nid_range(sbi, nid))
1142                 return;
1143
1144         rcu_read_lock();
1145         apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
1146         rcu_read_unlock();
1147         if (apage)
1148                 return;
1149
1150         apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
1151         if (!apage)
1152                 return;
1153
1154         err = read_node_page(apage, REQ_RAHEAD);
1155         f2fs_put_page(apage, err ? 1 : 0);
1156 }
1157
1158 static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
1159                                         struct page *parent, int start)
1160 {
1161         struct page *page;
1162         int err;
1163
1164         if (!nid)
1165                 return ERR_PTR(-ENOENT);
1166         if (check_nid_range(sbi, nid))
1167                 return ERR_PTR(-EINVAL);
1168 repeat:
1169         page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
1170         if (!page)
1171                 return ERR_PTR(-ENOMEM);
1172
1173         err = read_node_page(page, 0);
1174         if (err < 0) {
1175                 f2fs_put_page(page, 1);
1176                 return ERR_PTR(err);
1177         } else if (err == LOCKED_PAGE) {
1178                 err = 0;
1179                 goto page_hit;
1180         }
1181
1182         if (parent)
1183                 ra_node_pages(parent, start + 1, MAX_RA_NODE);
1184
1185         lock_page(page);
1186
1187         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1188                 f2fs_put_page(page, 1);
1189                 goto repeat;
1190         }
1191
1192         if (unlikely(!PageUptodate(page))) {
1193                 err = -EIO;
1194                 goto out_err;
1195         }
1196
1197         if (!f2fs_inode_chksum_verify(sbi, page)) {
1198                 err = -EFSBADCRC;
1199                 goto out_err;
1200         }
1201 page_hit:
1202         if(unlikely(nid != nid_of_node(page))) {
1203                 f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
1204                         "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
1205                         nid, nid_of_node(page), ino_of_node(page),
1206                         ofs_of_node(page), cpver_of_node(page),
1207                         next_blkaddr_of_node(page));
1208                 err = -EINVAL;
1209 out_err:
1210                 ClearPageUptodate(page);
1211                 f2fs_put_page(page, 1);
1212                 return ERR_PTR(err);
1213         }
1214         return page;
1215 }
1216
1217 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
1218 {
1219         return __get_node_page(sbi, nid, NULL, 0);
1220 }
1221
1222 struct page *get_node_page_ra(struct page *parent, int start)
1223 {
1224         struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1225         nid_t nid = get_nid(parent, start, false);
1226
1227         return __get_node_page(sbi, nid, parent, start);
1228 }
1229
1230 static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1231 {
1232         struct inode *inode;
1233         struct page *page;
1234         int ret;
1235
1236         /* should flush inline_data before evict_inode */
1237         inode = ilookup(sbi->sb, ino);
1238         if (!inode)
1239                 return;
1240
1241         page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0);
1242         if (!page)
1243                 goto iput_out;
1244
1245         if (!PageUptodate(page))
1246                 goto page_out;
1247
1248         if (!PageDirty(page))
1249                 goto page_out;
1250
1251         if (!clear_page_dirty_for_io(page))
1252                 goto page_out;
1253
1254         ret = f2fs_write_inline_data(inode, page);
1255         inode_dec_dirty_pages(inode);
1256         remove_dirty_inode(inode);
1257         if (ret)
1258                 set_page_dirty(page);
1259 page_out:
1260         f2fs_put_page(page, 1);
1261 iput_out:
1262         iput(inode);
1263 }
1264
1265 void move_node_page(struct page *node_page, int gc_type)
1266 {
1267         if (gc_type == FG_GC) {
1268                 struct f2fs_sb_info *sbi = F2FS_P_SB(node_page);
1269                 struct writeback_control wbc = {
1270                         .sync_mode = WB_SYNC_ALL,
1271                         .nr_to_write = 1,
1272                         .for_reclaim = 0,
1273                 };
1274
1275                 set_page_dirty(node_page);
1276                 f2fs_wait_on_page_writeback(node_page, NODE, true);
1277
1278                 f2fs_bug_on(sbi, PageWriteback(node_page));
1279                 if (!clear_page_dirty_for_io(node_page))
1280                         goto out_page;
1281
1282                 if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc))
1283                         unlock_page(node_page);
1284                 goto release_page;
1285         } else {
1286                 /* set page dirty and write it */
1287                 if (!PageWriteback(node_page))
1288                         set_page_dirty(node_page);
1289         }
1290 out_page:
1291         unlock_page(node_page);
1292 release_page:
1293         f2fs_put_page(node_page, 0);
1294 }
1295
1296 static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
1297 {
1298         pgoff_t index, end;
1299         struct pagevec pvec;
1300         struct page *last_page = NULL;
1301
1302         pagevec_init(&pvec, 0);
1303         index = 0;
1304         end = ULONG_MAX;
1305
1306         while (index <= end) {
1307                 int i, nr_pages;
1308                 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1309                                 PAGECACHE_TAG_DIRTY,
1310                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1311                 if (nr_pages == 0)
1312                         break;
1313
1314                 for (i = 0; i < nr_pages; i++) {
1315                         struct page *page = pvec.pages[i];
1316
1317                         if (unlikely(f2fs_cp_error(sbi))) {
1318                                 f2fs_put_page(last_page, 0);
1319                                 pagevec_release(&pvec);
1320                                 return ERR_PTR(-EIO);
1321                         }
1322
1323                         if (!IS_DNODE(page) || !is_cold_node(page))
1324                                 continue;
1325                         if (ino_of_node(page) != ino)
1326                                 continue;
1327
1328                         lock_page(page);
1329
1330                         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1331 continue_unlock:
1332                                 unlock_page(page);
1333                                 continue;
1334                         }
1335                         if (ino_of_node(page) != ino)
1336                                 goto continue_unlock;
1337
1338                         if (!PageDirty(page)) {
1339                                 /* someone wrote it for us */
1340                                 goto continue_unlock;
1341                         }
1342
1343                         if (last_page)
1344                                 f2fs_put_page(last_page, 0);
1345
1346                         get_page(page);
1347                         last_page = page;
1348                         unlock_page(page);
1349                 }
1350                 pagevec_release(&pvec);
1351                 cond_resched();
1352         }
1353         return last_page;
1354 }
1355
1356 static int __write_node_page(struct page *page, bool atomic, bool *submitted,
1357                                 struct writeback_control *wbc, bool do_balance,
1358                                 enum iostat_type io_type)
1359 {
1360         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1361         nid_t nid;
1362         struct node_info ni;
1363         struct f2fs_io_info fio = {
1364                 .sbi = sbi,
1365                 .type = NODE,
1366                 .op = REQ_OP_WRITE,
1367                 .op_flags = wbc_to_write_flags(wbc),
1368                 .page = page,
1369                 .encrypted_page = NULL,
1370                 .submitted = false,
1371                 .io_type = io_type,
1372         };
1373
1374         trace_f2fs_writepage(page, NODE);
1375
1376         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1377                 goto redirty_out;
1378         if (unlikely(f2fs_cp_error(sbi)))
1379                 goto redirty_out;
1380
1381         /* get old block addr of this node page */
1382         nid = nid_of_node(page);
1383         f2fs_bug_on(sbi, page->index != nid);
1384
1385         if (wbc->for_reclaim) {
1386                 if (!down_read_trylock(&sbi->node_write))
1387                         goto redirty_out;
1388         } else {
1389                 down_read(&sbi->node_write);
1390         }
1391
1392         get_node_info(sbi, nid, &ni);
1393
1394         /* This page is already truncated */
1395         if (unlikely(ni.blk_addr == NULL_ADDR)) {
1396                 ClearPageUptodate(page);
1397                 dec_page_count(sbi, F2FS_DIRTY_NODES);
1398                 up_read(&sbi->node_write);
1399                 unlock_page(page);
1400                 return 0;
1401         }
1402
1403         if (__is_valid_data_blkaddr(ni.blk_addr) &&
1404                 !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
1405                 up_read(&sbi->node_write);
1406                 goto redirty_out;
1407         }
1408
1409         if (atomic && !test_opt(sbi, NOBARRIER))
1410                 fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
1411
1412         set_page_writeback(page);
1413         fio.old_blkaddr = ni.blk_addr;
1414         write_node_page(nid, &fio);
1415         set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
1416         dec_page_count(sbi, F2FS_DIRTY_NODES);
1417         up_read(&sbi->node_write);
1418
1419         if (wbc->for_reclaim) {
1420                 f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
1421                                                 page->index, NODE);
1422                 submitted = NULL;
1423         }
1424
1425         unlock_page(page);
1426
1427         if (unlikely(f2fs_cp_error(sbi))) {
1428                 f2fs_submit_merged_write(sbi, NODE);
1429                 submitted = NULL;
1430         }
1431         if (submitted)
1432                 *submitted = fio.submitted;
1433
1434         if (do_balance)
1435                 f2fs_balance_fs(sbi, false);
1436         return 0;
1437
1438 redirty_out:
1439         redirty_page_for_writepage(wbc, page);
1440         return AOP_WRITEPAGE_ACTIVATE;
1441 }
1442
1443 static int f2fs_write_node_page(struct page *page,
1444                                 struct writeback_control *wbc)
1445 {
1446         return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
1447 }
1448
1449 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
1450                         struct writeback_control *wbc, bool atomic)
1451 {
1452         pgoff_t index, end;
1453         pgoff_t last_idx = ULONG_MAX;
1454         struct pagevec pvec;
1455         int ret = 0;
1456         struct page *last_page = NULL;
1457         bool marked = false;
1458         nid_t ino = inode->i_ino;
1459
1460         if (atomic) {
1461                 last_page = last_fsync_dnode(sbi, ino);
1462                 if (IS_ERR_OR_NULL(last_page))
1463                         return PTR_ERR_OR_ZERO(last_page);
1464         }
1465 retry:
1466         pagevec_init(&pvec, 0);
1467         index = 0;
1468         end = ULONG_MAX;
1469
1470         while (index <= end) {
1471                 int i, nr_pages;
1472                 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1473                                 PAGECACHE_TAG_DIRTY,
1474                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1475                 if (nr_pages == 0)
1476                         break;
1477
1478                 for (i = 0; i < nr_pages; i++) {
1479                         struct page *page = pvec.pages[i];
1480                         bool submitted = false;
1481
1482                         if (unlikely(f2fs_cp_error(sbi))) {
1483                                 f2fs_put_page(last_page, 0);
1484                                 pagevec_release(&pvec);
1485                                 ret = -EIO;
1486                                 goto out;
1487                         }
1488
1489                         if (!IS_DNODE(page) || !is_cold_node(page))
1490                                 continue;
1491                         if (ino_of_node(page) != ino)
1492                                 continue;
1493
1494                         lock_page(page);
1495
1496                         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1497 continue_unlock:
1498                                 unlock_page(page);
1499                                 continue;
1500                         }
1501                         if (ino_of_node(page) != ino)
1502                                 goto continue_unlock;
1503
1504                         if (!PageDirty(page) && page != last_page) {
1505                                 /* someone wrote it for us */
1506                                 goto continue_unlock;
1507                         }
1508
1509                         f2fs_wait_on_page_writeback(page, NODE, true);
1510                         BUG_ON(PageWriteback(page));
1511
1512                         set_fsync_mark(page, 0);
1513                         set_dentry_mark(page, 0);
1514
1515                         if (!atomic || page == last_page) {
1516                                 set_fsync_mark(page, 1);
1517                                 if (IS_INODE(page)) {
1518                                         if (is_inode_flag_set(inode,
1519                                                                 FI_DIRTY_INODE))
1520                                                 update_inode(inode, page);
1521                                         set_dentry_mark(page,
1522                                                 need_dentry_mark(sbi, ino));
1523                                 }
1524                                 /*  may be written by other thread */
1525                                 if (!PageDirty(page))
1526                                         set_page_dirty(page);
1527                         }
1528
1529                         if (!clear_page_dirty_for_io(page))
1530                                 goto continue_unlock;
1531
1532                         ret = __write_node_page(page, atomic &&
1533                                                 page == last_page,
1534                                                 &submitted, wbc, true,
1535                                                 FS_NODE_IO);
1536                         if (ret) {
1537                                 unlock_page(page);
1538                                 f2fs_put_page(last_page, 0);
1539                                 break;
1540                         } else if (submitted) {
1541                                 last_idx = page->index;
1542                         }
1543
1544                         if (page == last_page) {
1545                                 f2fs_put_page(page, 0);
1546                                 marked = true;
1547                                 break;
1548                         }
1549                 }
1550                 pagevec_release(&pvec);
1551                 cond_resched();
1552
1553                 if (ret || marked)
1554                         break;
1555         }
1556         if (!ret && atomic && !marked) {
1557                 f2fs_msg(sbi->sb, KERN_DEBUG,
1558                         "Retry to write fsync mark: ino=%u, idx=%lx",
1559                                         ino, last_page->index);
1560                 lock_page(last_page);
1561                 f2fs_wait_on_page_writeback(last_page, NODE, true);
1562                 set_page_dirty(last_page);
1563                 unlock_page(last_page);
1564                 goto retry;
1565         }
1566 out:
1567         if (last_idx != ULONG_MAX)
1568                 f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
1569         return ret ? -EIO: 0;
1570 }
1571
1572 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
1573                                 bool do_balance, enum iostat_type io_type)
1574 {
1575         pgoff_t index, end;
1576         struct pagevec pvec;
1577         int step = 0;
1578         int nwritten = 0;
1579         int ret = 0;
1580
1581         pagevec_init(&pvec, 0);
1582
1583 next_step:
1584         index = 0;
1585         end = ULONG_MAX;
1586
1587         while (index <= end) {
1588                 int i, nr_pages;
1589                 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1590                                 PAGECACHE_TAG_DIRTY,
1591                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1592                 if (nr_pages == 0)
1593                         break;
1594
1595                 for (i = 0; i < nr_pages; i++) {
1596                         struct page *page = pvec.pages[i];
1597                         bool submitted = false;
1598
1599                         if (unlikely(f2fs_cp_error(sbi))) {
1600                                 pagevec_release(&pvec);
1601                                 ret = -EIO;
1602                                 goto out;
1603                         }
1604
1605                         /*
1606                          * flushing sequence with step:
1607                          * 0. indirect nodes
1608                          * 1. dentry dnodes
1609                          * 2. file dnodes
1610                          */
1611                         if (step == 0 && IS_DNODE(page))
1612                                 continue;
1613                         if (step == 1 && (!IS_DNODE(page) ||
1614                                                 is_cold_node(page)))
1615                                 continue;
1616                         if (step == 2 && (!IS_DNODE(page) ||
1617                                                 !is_cold_node(page)))
1618                                 continue;
1619 lock_node:
1620                         if (wbc->sync_mode == WB_SYNC_ALL)
1621                                 lock_page(page);
1622                         else if (!trylock_page(page))
1623                                 continue;
1624
1625                         if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1626 continue_unlock:
1627                                 unlock_page(page);
1628                                 continue;
1629                         }
1630
1631                         if (!PageDirty(page)) {
1632                                 /* someone wrote it for us */
1633                                 goto continue_unlock;
1634                         }
1635
1636                         /* flush inline_data */
1637                         if (is_inline_node(page)) {
1638                                 clear_inline_node(page);
1639                                 unlock_page(page);
1640                                 flush_inline_data(sbi, ino_of_node(page));
1641                                 goto lock_node;
1642                         }
1643
1644                         f2fs_wait_on_page_writeback(page, NODE, true);
1645
1646                         BUG_ON(PageWriteback(page));
1647                         if (!clear_page_dirty_for_io(page))
1648                                 goto continue_unlock;
1649
1650                         set_fsync_mark(page, 0);
1651                         set_dentry_mark(page, 0);
1652
1653                         ret = __write_node_page(page, false, &submitted,
1654                                                 wbc, do_balance, io_type);
1655                         if (ret)
1656                                 unlock_page(page);
1657                         else if (submitted)
1658                                 nwritten++;
1659
1660                         if (--wbc->nr_to_write == 0)
1661                                 break;
1662                 }
1663                 pagevec_release(&pvec);
1664                 cond_resched();
1665
1666                 if (wbc->nr_to_write == 0) {
1667                         step = 2;
1668                         break;
1669                 }
1670         }
1671
1672         if (step < 2) {
1673                 step++;
1674                 goto next_step;
1675         }
1676 out:
1677         if (nwritten)
1678                 f2fs_submit_merged_write(sbi, NODE);
1679         return ret;
1680 }
1681
1682 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1683 {
1684         pgoff_t index = 0, end = ULONG_MAX;
1685         struct pagevec pvec;
1686         int ret2, ret = 0;
1687
1688         pagevec_init(&pvec, 0);
1689
1690         while (index <= end) {
1691                 int i, nr_pages;
1692                 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1693                                 PAGECACHE_TAG_WRITEBACK,
1694                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1695                 if (nr_pages == 0)
1696                         break;
1697
1698                 for (i = 0; i < nr_pages; i++) {
1699                         struct page *page = pvec.pages[i];
1700
1701                         /* until radix tree lookup accepts end_index */
1702                         if (unlikely(page->index > end))
1703                                 continue;
1704
1705                         if (ino && ino_of_node(page) == ino) {
1706                                 f2fs_wait_on_page_writeback(page, NODE, true);
1707                                 if (TestClearPageError(page))
1708                                         ret = -EIO;
1709                         }
1710                 }
1711                 pagevec_release(&pvec);
1712                 cond_resched();
1713         }
1714
1715         ret2 = filemap_check_errors(NODE_MAPPING(sbi));
1716         if (!ret)
1717                 ret = ret2;
1718         return ret;
1719 }
1720
1721 static int f2fs_write_node_pages(struct address_space *mapping,
1722                             struct writeback_control *wbc)
1723 {
1724         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
1725         struct blk_plug plug;
1726         long diff;
1727
1728         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1729                 goto skip_write;
1730
1731         /* balancing f2fs's metadata in background */
1732         f2fs_balance_fs_bg(sbi);
1733
1734         /* collect a number of dirty node pages and write together */
1735         if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1736                 goto skip_write;
1737
1738         trace_f2fs_writepages(mapping->host, wbc, NODE);
1739
1740         diff = nr_pages_to_write(sbi, NODE, wbc);
1741         wbc->sync_mode = WB_SYNC_NONE;
1742         blk_start_plug(&plug);
1743         sync_node_pages(sbi, wbc, true, FS_NODE_IO);
1744         blk_finish_plug(&plug);
1745         wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1746         return 0;
1747
1748 skip_write:
1749         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1750         trace_f2fs_writepages(mapping->host, wbc, NODE);
1751         return 0;
1752 }
1753
1754 static int f2fs_set_node_page_dirty(struct page *page)
1755 {
1756         trace_f2fs_set_page_dirty(page, NODE);
1757
1758         if (!PageUptodate(page))
1759                 SetPageUptodate(page);
1760         if (!PageDirty(page)) {
1761                 f2fs_set_page_dirty_nobuffers(page);
1762                 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
1763                 SetPagePrivate(page);
1764                 f2fs_trace_pid(page);
1765                 return 1;
1766         }
1767         return 0;
1768 }
1769
1770 /*
1771  * Structure of the f2fs node operations
1772  */
1773 const struct address_space_operations f2fs_node_aops = {
1774         .writepage      = f2fs_write_node_page,
1775         .writepages     = f2fs_write_node_pages,
1776         .set_page_dirty = f2fs_set_node_page_dirty,
1777         .invalidatepage = f2fs_invalidate_page,
1778         .releasepage    = f2fs_release_page,
1779 #ifdef CONFIG_MIGRATION
1780         .migratepage    = f2fs_migrate_page,
1781 #endif
1782 };
1783
1784 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1785                                                 nid_t n)
1786 {
1787         return radix_tree_lookup(&nm_i->free_nid_root, n);
1788 }
1789
1790 static int __insert_nid_to_list(struct f2fs_sb_info *sbi,
1791                         struct free_nid *i, enum nid_list list, bool new)
1792 {
1793         struct f2fs_nm_info *nm_i = NM_I(sbi);
1794
1795         if (new) {
1796                 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
1797                 if (err)
1798                         return err;
1799         }
1800
1801         f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
1802                                                 i->state != NID_ALLOC);
1803         nm_i->nid_cnt[list]++;
1804         list_add_tail(&i->list, &nm_i->nid_list[list]);
1805         return 0;
1806 }
1807
1808 static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
1809                         struct free_nid *i, enum nid_list list, bool reuse)
1810 {
1811         struct f2fs_nm_info *nm_i = NM_I(sbi);
1812
1813         f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
1814                                                 i->state != NID_ALLOC);
1815         nm_i->nid_cnt[list]--;
1816         list_del(&i->list);
1817         if (!reuse)
1818                 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1819 }
1820
1821 /* return if the nid is recognized as free */
1822 static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1823 {
1824         struct f2fs_nm_info *nm_i = NM_I(sbi);
1825         struct free_nid *i, *e;
1826         struct nat_entry *ne;
1827         int err = -EINVAL;
1828         bool ret = false;
1829
1830         /* 0 nid should not be used */
1831         if (unlikely(nid == 0))
1832                 return false;
1833
1834         i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1835         i->nid = nid;
1836         i->state = NID_NEW;
1837
1838         if (radix_tree_preload(GFP_NOFS))
1839                 goto err;
1840
1841         spin_lock(&nm_i->nid_list_lock);
1842
1843         if (build) {
1844                 /*
1845                  *   Thread A             Thread B
1846                  *  - f2fs_create
1847                  *   - f2fs_new_inode
1848                  *    - alloc_nid
1849                  *     - __insert_nid_to_list(ALLOC_NID_LIST)
1850                  *                     - f2fs_balance_fs_bg
1851                  *                      - build_free_nids
1852                  *                       - __build_free_nids
1853                  *                        - scan_nat_page
1854                  *                         - add_free_nid
1855                  *                          - __lookup_nat_cache
1856                  *  - f2fs_add_link
1857                  *   - init_inode_metadata
1858                  *    - new_inode_page
1859                  *     - new_node_page
1860                  *      - set_node_addr
1861                  *  - alloc_nid_done
1862                  *   - __remove_nid_from_list(ALLOC_NID_LIST)
1863                  *                         - __insert_nid_to_list(FREE_NID_LIST)
1864                  */
1865                 ne = __lookup_nat_cache(nm_i, nid);
1866                 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1867                                 nat_get_blkaddr(ne) != NULL_ADDR))
1868                         goto err_out;
1869
1870                 e = __lookup_free_nid_list(nm_i, nid);
1871                 if (e) {
1872                         if (e->state == NID_NEW)
1873                                 ret = true;
1874                         goto err_out;
1875                 }
1876         }
1877         ret = true;
1878         err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
1879 err_out:
1880         spin_unlock(&nm_i->nid_list_lock);
1881         radix_tree_preload_end();
1882 err:
1883         if (err)
1884                 kmem_cache_free(free_nid_slab, i);
1885         return ret;
1886 }
1887
1888 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
1889 {
1890         struct f2fs_nm_info *nm_i = NM_I(sbi);
1891         struct free_nid *i;
1892         bool need_free = false;
1893
1894         spin_lock(&nm_i->nid_list_lock);
1895         i = __lookup_free_nid_list(nm_i, nid);
1896         if (i && i->state == NID_NEW) {
1897                 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
1898                 need_free = true;
1899         }
1900         spin_unlock(&nm_i->nid_list_lock);
1901
1902         if (need_free)
1903                 kmem_cache_free(free_nid_slab, i);
1904 }
1905
1906 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
1907                                                         bool set, bool build)
1908 {
1909         struct f2fs_nm_info *nm_i = NM_I(sbi);
1910         unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
1911         unsigned int nid_ofs = nid - START_NID(nid);
1912
1913         if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
1914                 return;
1915
1916         if (set)
1917                 __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1918         else
1919                 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1920
1921         if (set)
1922                 nm_i->free_nid_count[nat_ofs]++;
1923         else if (!build)
1924                 nm_i->free_nid_count[nat_ofs]--;
1925 }
1926
1927 static void scan_nat_page(struct f2fs_sb_info *sbi,
1928                         struct page *nat_page, nid_t start_nid)
1929 {
1930         struct f2fs_nm_info *nm_i = NM_I(sbi);
1931         struct f2fs_nat_block *nat_blk = page_address(nat_page);
1932         block_t blk_addr;
1933         unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
1934         int i;
1935
1936         if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
1937                 return;
1938
1939         __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
1940
1941         i = start_nid % NAT_ENTRY_PER_BLOCK;
1942
1943         for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1944                 bool freed = false;
1945
1946                 if (unlikely(start_nid >= nm_i->max_nid))
1947                         break;
1948
1949                 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1950                 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1951                 if (blk_addr == NULL_ADDR)
1952                         freed = add_free_nid(sbi, start_nid, true);
1953                 spin_lock(&NM_I(sbi)->nid_list_lock);
1954                 update_free_nid_bitmap(sbi, start_nid, freed, true);
1955                 spin_unlock(&NM_I(sbi)->nid_list_lock);
1956         }
1957 }
1958
1959 static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
1960 {
1961         struct f2fs_nm_info *nm_i = NM_I(sbi);
1962         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1963         struct f2fs_journal *journal = curseg->journal;
1964         unsigned int i, idx;
1965
1966         down_read(&nm_i->nat_tree_lock);
1967
1968         for (i = 0; i < nm_i->nat_blocks; i++) {
1969                 if (!test_bit_le(i, nm_i->nat_block_bitmap))
1970                         continue;
1971                 if (!nm_i->free_nid_count[i])
1972                         continue;
1973                 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
1974                         nid_t nid;
1975
1976                         if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
1977                                 continue;
1978
1979                         nid = i * NAT_ENTRY_PER_BLOCK + idx;
1980                         add_free_nid(sbi, nid, true);
1981
1982                         if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
1983                                 goto out;
1984                 }
1985         }
1986 out:
1987         down_read(&curseg->journal_rwsem);
1988         for (i = 0; i < nats_in_cursum(journal); i++) {
1989                 block_t addr;
1990                 nid_t nid;
1991
1992                 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
1993                 nid = le32_to_cpu(nid_in_journal(journal, i));
1994                 if (addr == NULL_ADDR)
1995                         add_free_nid(sbi, nid, true);
1996                 else
1997                         remove_free_nid(sbi, nid);
1998         }
1999         up_read(&curseg->journal_rwsem);
2000         up_read(&nm_i->nat_tree_lock);
2001 }
2002
2003 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
2004 {
2005         struct f2fs_nm_info *nm_i = NM_I(sbi);
2006         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2007         struct f2fs_journal *journal = curseg->journal;
2008         int i = 0;
2009         nid_t nid = nm_i->next_scan_nid;
2010
2011         if (unlikely(nid >= nm_i->max_nid))
2012                 nid = 0;
2013
2014         if (unlikely(nid % NAT_ENTRY_PER_BLOCK))
2015                 nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK;
2016
2017         /* Enough entries */
2018         if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
2019                 return;
2020
2021         if (!sync && !available_free_memory(sbi, FREE_NIDS))
2022                 return;
2023
2024         if (!mount) {
2025                 /* try to find free nids in free_nid_bitmap */
2026                 scan_free_nid_bits(sbi);
2027
2028                 if (nm_i->nid_cnt[FREE_NID_LIST])
2029                         return;
2030         }
2031
2032         /* readahead nat pages to be scanned */
2033         ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
2034                                                         META_NAT, true);
2035
2036         down_read(&nm_i->nat_tree_lock);
2037
2038         while (1) {
2039                 struct page *page = get_current_nat_page(sbi, nid);
2040
2041                 scan_nat_page(sbi, page, nid);
2042                 f2fs_put_page(page, 1);
2043
2044                 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
2045                 if (unlikely(nid >= nm_i->max_nid))
2046                         nid = 0;
2047
2048                 if (++i >= FREE_NID_PAGES)
2049                         break;
2050         }
2051
2052         /* go to the next free nat pages to find free nids abundantly */
2053         nm_i->next_scan_nid = nid;
2054
2055         /* find free nids from current sum_pages */
2056         down_read(&curseg->journal_rwsem);
2057         for (i = 0; i < nats_in_cursum(journal); i++) {
2058                 block_t addr;
2059
2060                 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
2061                 nid = le32_to_cpu(nid_in_journal(journal, i));
2062                 if (addr == NULL_ADDR)
2063                         add_free_nid(sbi, nid, true);
2064                 else
2065                         remove_free_nid(sbi, nid);
2066         }
2067         up_read(&curseg->journal_rwsem);
2068         up_read(&nm_i->nat_tree_lock);
2069
2070         ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
2071                                         nm_i->ra_nid_pages, META_NAT, false);
2072 }
2073
2074 void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
2075 {
2076         mutex_lock(&NM_I(sbi)->build_lock);
2077         __build_free_nids(sbi, sync, mount);
2078         mutex_unlock(&NM_I(sbi)->build_lock);
2079 }
2080
2081 /*
2082  * If this function returns success, caller can obtain a new nid
2083  * from second parameter of this function.
2084  * The returned nid could be used ino as well as nid when inode is created.
2085  */
2086 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
2087 {
2088         struct f2fs_nm_info *nm_i = NM_I(sbi);
2089         struct free_nid *i = NULL;
2090 retry:
2091 #ifdef CONFIG_F2FS_FAULT_INJECTION
2092         if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
2093                 f2fs_show_injection_info(FAULT_ALLOC_NID);
2094                 return false;
2095         }
2096 #endif
2097         spin_lock(&nm_i->nid_list_lock);
2098
2099         if (unlikely(nm_i->available_nids == 0)) {
2100                 spin_unlock(&nm_i->nid_list_lock);
2101                 return false;
2102         }
2103
2104         /* We should not use stale free nids created by build_free_nids */
2105         if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
2106                 f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
2107                 i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
2108                                         struct free_nid, list);
2109                 *nid = i->nid;
2110
2111                 __remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
2112                 i->state = NID_ALLOC;
2113                 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
2114                 nm_i->available_nids--;
2115
2116                 update_free_nid_bitmap(sbi, *nid, false, false);
2117
2118                 spin_unlock(&nm_i->nid_list_lock);
2119                 return true;
2120         }
2121         spin_unlock(&nm_i->nid_list_lock);
2122
2123         /* Let's scan nat pages and its caches to get free nids */
2124         build_free_nids(sbi, true, false);
2125         goto retry;
2126 }
2127
2128 /*
2129  * alloc_nid() should be called prior to this function.
2130  */
2131 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
2132 {
2133         struct f2fs_nm_info *nm_i = NM_I(sbi);
2134         struct free_nid *i;
2135
2136         spin_lock(&nm_i->nid_list_lock);
2137         i = __lookup_free_nid_list(nm_i, nid);
2138         f2fs_bug_on(sbi, !i);
2139         __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
2140         spin_unlock(&nm_i->nid_list_lock);
2141
2142         kmem_cache_free(free_nid_slab, i);
2143 }
2144
2145 /*
2146  * alloc_nid() should be called prior to this function.
2147  */
2148 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
2149 {
2150         struct f2fs_nm_info *nm_i = NM_I(sbi);
2151         struct free_nid *i;
2152         bool need_free = false;
2153
2154         if (!nid)
2155                 return;
2156
2157         spin_lock(&nm_i->nid_list_lock);
2158         i = __lookup_free_nid_list(nm_i, nid);
2159         f2fs_bug_on(sbi, !i);
2160
2161         if (!available_free_memory(sbi, FREE_NIDS)) {
2162                 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
2163                 need_free = true;
2164         } else {
2165                 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true);
2166                 i->state = NID_NEW;
2167                 __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
2168         }
2169
2170         nm_i->available_nids++;
2171
2172         update_free_nid_bitmap(sbi, nid, true, false);
2173
2174         spin_unlock(&nm_i->nid_list_lock);
2175
2176         if (need_free)
2177                 kmem_cache_free(free_nid_slab, i);
2178 }
2179
2180 int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
2181 {
2182         struct f2fs_nm_info *nm_i = NM_I(sbi);
2183         struct free_nid *i, *next;
2184         int nr = nr_shrink;
2185
2186         if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
2187                 return 0;
2188
2189         if (!mutex_trylock(&nm_i->build_lock))
2190                 return 0;
2191
2192         spin_lock(&nm_i->nid_list_lock);
2193         list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST],
2194                                                                         list) {
2195                 if (nr_shrink <= 0 ||
2196                                 nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
2197                         break;
2198
2199                 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
2200                 kmem_cache_free(free_nid_slab, i);
2201                 nr_shrink--;
2202         }
2203         spin_unlock(&nm_i->nid_list_lock);
2204         mutex_unlock(&nm_i->build_lock);
2205
2206         return nr - nr_shrink;
2207 }
2208
2209 void recover_inline_xattr(struct inode *inode, struct page *page)
2210 {
2211         void *src_addr, *dst_addr;
2212         size_t inline_size;
2213         struct page *ipage;
2214         struct f2fs_inode *ri;
2215
2216         ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
2217         f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
2218
2219         ri = F2FS_INODE(page);
2220         if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
2221                 clear_inode_flag(inode, FI_INLINE_XATTR);
2222                 goto update_inode;
2223         }
2224
2225         dst_addr = inline_xattr_addr(ipage);
2226         src_addr = inline_xattr_addr(page);
2227         inline_size = inline_xattr_size(inode);
2228
2229         f2fs_wait_on_page_writeback(ipage, NODE, true);
2230         memcpy(dst_addr, src_addr, inline_size);
2231 update_inode:
2232         update_inode(inode, ipage);
2233         f2fs_put_page(ipage, 1);
2234 }
2235
2236 int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
2237 {
2238         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2239         nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
2240         nid_t new_xnid;
2241         struct dnode_of_data dn;
2242         struct node_info ni;
2243         struct page *xpage;
2244
2245         if (!prev_xnid)
2246                 goto recover_xnid;
2247
2248         /* 1: invalidate the previous xattr nid */
2249         get_node_info(sbi, prev_xnid, &ni);
2250         f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
2251         invalidate_blocks(sbi, ni.blk_addr);
2252         dec_valid_node_count(sbi, inode, false);
2253         set_node_addr(sbi, &ni, NULL_ADDR, false);
2254
2255 recover_xnid:
2256         /* 2: update xattr nid in inode */
2257         if (!alloc_nid(sbi, &new_xnid))
2258                 return -ENOSPC;
2259
2260         set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
2261         xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
2262         if (IS_ERR(xpage)) {
2263                 alloc_nid_failed(sbi, new_xnid);
2264                 return PTR_ERR(xpage);
2265         }
2266
2267         alloc_nid_done(sbi, new_xnid);
2268         update_inode_page(inode);
2269
2270         /* 3: update and set xattr node page dirty */
2271         memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
2272
2273         set_page_dirty(xpage);
2274         f2fs_put_page(xpage, 1);
2275
2276         return 0;
2277 }
2278
2279 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
2280 {
2281         struct f2fs_inode *src, *dst;
2282         nid_t ino = ino_of_node(page);
2283         struct node_info old_ni, new_ni;
2284         struct page *ipage;
2285
2286         get_node_info(sbi, ino, &old_ni);
2287
2288         if (unlikely(old_ni.blk_addr != NULL_ADDR))
2289                 return -EINVAL;
2290 retry:
2291         ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
2292         if (!ipage) {
2293                 congestion_wait(BLK_RW_ASYNC, HZ/50);
2294                 goto retry;
2295         }
2296
2297         /* Should not use this inode from free nid list */
2298         remove_free_nid(sbi, ino);
2299
2300         if (!PageUptodate(ipage))
2301                 SetPageUptodate(ipage);
2302         fill_node_footer(ipage, ino, ino, 0, true);
2303
2304         src = F2FS_INODE(page);
2305         dst = F2FS_INODE(ipage);
2306
2307         memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
2308         dst->i_size = 0;
2309         dst->i_blocks = cpu_to_le64(1);
2310         dst->i_links = cpu_to_le32(1);
2311         dst->i_xattr_nid = 0;
2312         dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
2313         if (dst->i_inline & F2FS_EXTRA_ATTR) {
2314                 dst->i_extra_isize = src->i_extra_isize;
2315                 if (f2fs_sb_has_project_quota(sbi->sb) &&
2316                         F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2317                                                                 i_projid))
2318                         dst->i_projid = src->i_projid;
2319         }
2320
2321         new_ni = old_ni;
2322         new_ni.ino = ino;
2323
2324         if (unlikely(inc_valid_node_count(sbi, NULL, true)))
2325                 WARN_ON(1);
2326         set_node_addr(sbi, &new_ni, NEW_ADDR, false);
2327         inc_valid_inode_count(sbi);
2328         set_page_dirty(ipage);
2329         f2fs_put_page(ipage, 1);
2330         return 0;
2331 }
2332
2333 int restore_node_summary(struct f2fs_sb_info *sbi,
2334                         unsigned int segno, struct f2fs_summary_block *sum)
2335 {
2336         struct f2fs_node *rn;
2337         struct f2fs_summary *sum_entry;
2338         block_t addr;
2339         int i, idx, last_offset, nrpages;
2340
2341         /* scan the node segment */
2342         last_offset = sbi->blocks_per_seg;
2343         addr = START_BLOCK(sbi, segno);
2344         sum_entry = &sum->entries[0];
2345
2346         for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
2347                 nrpages = min(last_offset - i, BIO_MAX_PAGES);
2348
2349                 /* readahead node pages */
2350                 ra_meta_pages(sbi, addr, nrpages, META_POR, true);
2351
2352                 for (idx = addr; idx < addr + nrpages; idx++) {
2353                         struct page *page = get_tmp_page(sbi, idx);
2354
2355                         rn = F2FS_NODE(page);
2356                         sum_entry->nid = rn->footer.nid;
2357                         sum_entry->version = 0;
2358                         sum_entry->ofs_in_node = 0;
2359                         sum_entry++;
2360                         f2fs_put_page(page, 1);
2361                 }
2362
2363                 invalidate_mapping_pages(META_MAPPING(sbi), addr,
2364                                                         addr + nrpages);
2365         }
2366         return 0;
2367 }
2368
2369 static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
2370 {
2371         struct f2fs_nm_info *nm_i = NM_I(sbi);
2372         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2373         struct f2fs_journal *journal = curseg->journal;
2374         int i;
2375
2376         down_write(&curseg->journal_rwsem);
2377         for (i = 0; i < nats_in_cursum(journal); i++) {
2378                 struct nat_entry *ne;
2379                 struct f2fs_nat_entry raw_ne;
2380                 nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
2381
2382                 raw_ne = nat_in_journal(journal, i);
2383
2384                 ne = __lookup_nat_cache(nm_i, nid);
2385                 if (!ne) {
2386                         ne = grab_nat_entry(nm_i, nid, true);
2387                         node_info_from_raw_nat(&ne->ni, &raw_ne);
2388                 }
2389
2390                 /*
2391                  * if a free nat in journal has not been used after last
2392                  * checkpoint, we should remove it from available nids,
2393                  * since later we will add it again.
2394                  */
2395                 if (!get_nat_flag(ne, IS_DIRTY) &&
2396                                 le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
2397                         spin_lock(&nm_i->nid_list_lock);
2398                         nm_i->available_nids--;
2399                         spin_unlock(&nm_i->nid_list_lock);
2400                 }
2401
2402                 __set_nat_cache_dirty(nm_i, ne);
2403         }
2404         update_nats_in_cursum(journal, -i);
2405         up_write(&curseg->journal_rwsem);
2406 }
2407
2408 static void __adjust_nat_entry_set(struct nat_entry_set *nes,
2409                                                 struct list_head *head, int max)
2410 {
2411         struct nat_entry_set *cur;
2412
2413         if (nes->entry_cnt >= max)
2414                 goto add_out;
2415
2416         list_for_each_entry(cur, head, set_list) {
2417                 if (cur->entry_cnt >= nes->entry_cnt) {
2418                         list_add(&nes->set_list, cur->set_list.prev);
2419                         return;
2420                 }
2421         }
2422 add_out:
2423         list_add_tail(&nes->set_list, head);
2424 }
2425
2426 static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2427                                                 struct page *page)
2428 {
2429         struct f2fs_nm_info *nm_i = NM_I(sbi);
2430         unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
2431         struct f2fs_nat_block *nat_blk = page_address(page);
2432         int valid = 0;
2433         int i;
2434
2435         if (!enabled_nat_bits(sbi, NULL))
2436                 return;
2437
2438         for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
2439                 if (start_nid == 0 && i == 0)
2440                         valid++;
2441                 if (nat_blk->entries[i].block_addr)
2442                         valid++;
2443         }
2444         if (valid == 0) {
2445                 __set_bit_le(nat_index, nm_i->empty_nat_bits);
2446                 __clear_bit_le(nat_index, nm_i->full_nat_bits);
2447                 return;
2448         }
2449
2450         __clear_bit_le(nat_index, nm_i->empty_nat_bits);
2451         if (valid == NAT_ENTRY_PER_BLOCK)
2452                 __set_bit_le(nat_index, nm_i->full_nat_bits);
2453         else
2454                 __clear_bit_le(nat_index, nm_i->full_nat_bits);
2455 }
2456
2457 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2458                 struct nat_entry_set *set, struct cp_control *cpc)
2459 {
2460         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2461         struct f2fs_journal *journal = curseg->journal;
2462         nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
2463         bool to_journal = true;
2464         struct f2fs_nat_block *nat_blk;
2465         struct nat_entry *ne, *cur;
2466         struct page *page = NULL;
2467
2468         /*
2469          * there are two steps to flush nat entries:
2470          * #1, flush nat entries to journal in current hot data summary block.
2471          * #2, flush nat entries to nat page.
2472          */
2473         if (enabled_nat_bits(sbi, cpc) ||
2474                 !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
2475                 to_journal = false;
2476
2477         if (to_journal) {
2478                 down_write(&curseg->journal_rwsem);
2479         } else {
2480                 page = get_next_nat_page(sbi, start_nid);
2481                 nat_blk = page_address(page);
2482                 f2fs_bug_on(sbi, !nat_blk);
2483         }
2484
2485         /* flush dirty nats in nat entry set */
2486         list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
2487                 struct f2fs_nat_entry *raw_ne;
2488                 nid_t nid = nat_get_nid(ne);
2489                 int offset;
2490
2491                 f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
2492
2493                 if (to_journal) {
2494                         offset = lookup_journal_in_cursum(journal,
2495                                                         NAT_JOURNAL, nid, 1);
2496                         f2fs_bug_on(sbi, offset < 0);
2497                         raw_ne = &nat_in_journal(journal, offset);
2498                         nid_in_journal(journal, offset) = cpu_to_le32(nid);
2499                 } else {
2500                         raw_ne = &nat_blk->entries[nid - start_nid];
2501                 }
2502                 raw_nat_from_node_info(raw_ne, &ne->ni);
2503                 nat_reset_flag(ne);
2504                 __clear_nat_cache_dirty(NM_I(sbi), set, ne);
2505                 if (nat_get_blkaddr(ne) == NULL_ADDR) {
2506                         add_free_nid(sbi, nid, false);
2507                         spin_lock(&NM_I(sbi)->nid_list_lock);
2508                         NM_I(sbi)->available_nids++;
2509                         update_free_nid_bitmap(sbi, nid, true, false);
2510                         spin_unlock(&NM_I(sbi)->nid_list_lock);
2511                 } else {
2512                         spin_lock(&NM_I(sbi)->nid_list_lock);
2513                         update_free_nid_bitmap(sbi, nid, false, false);
2514                         spin_unlock(&NM_I(sbi)->nid_list_lock);
2515                 }
2516         }
2517
2518         if (to_journal) {
2519                 up_write(&curseg->journal_rwsem);
2520         } else {
2521                 __update_nat_bits(sbi, start_nid, page);
2522                 f2fs_put_page(page, 1);
2523         }
2524
2525         /* Allow dirty nats by node block allocation in write_begin */
2526         if (!set->entry_cnt) {
2527                 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
2528                 kmem_cache_free(nat_entry_set_slab, set);
2529         }
2530 }
2531
2532 /*
2533  * This function is called during the checkpointing process.
2534  */
2535 void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2536 {
2537         struct f2fs_nm_info *nm_i = NM_I(sbi);
2538         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2539         struct f2fs_journal *journal = curseg->journal;
2540         struct nat_entry_set *setvec[SETVEC_SIZE];
2541         struct nat_entry_set *set, *tmp;
2542         unsigned int found;
2543         nid_t set_idx = 0;
2544         LIST_HEAD(sets);
2545
2546         if (!nm_i->dirty_nat_cnt)
2547                 return;
2548
2549         down_write(&nm_i->nat_tree_lock);
2550
2551         /*
2552          * if there are no enough space in journal to store dirty nat
2553          * entries, remove all entries from journal and merge them
2554          * into nat entry set.
2555          */
2556         if (enabled_nat_bits(sbi, cpc) ||
2557                 !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
2558                 remove_nats_in_journal(sbi);
2559
2560         while ((found = __gang_lookup_nat_set(nm_i,
2561                                         set_idx, SETVEC_SIZE, setvec))) {
2562                 unsigned idx;
2563                 set_idx = setvec[found - 1]->set + 1;
2564                 for (idx = 0; idx < found; idx++)
2565                         __adjust_nat_entry_set(setvec[idx], &sets,
2566                                                 MAX_NAT_JENTRIES(journal));
2567         }
2568
2569         /* flush dirty nats in nat entry set */
2570         list_for_each_entry_safe(set, tmp, &sets, set_list)
2571                 __flush_nat_entry_set(sbi, set, cpc);
2572
2573         up_write(&nm_i->nat_tree_lock);
2574         /* Allow dirty nats by node block allocation in write_begin */
2575 }
2576
2577 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
2578 {
2579         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2580         struct f2fs_nm_info *nm_i = NM_I(sbi);
2581         unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
2582         unsigned int i;
2583         __u64 cp_ver = cur_cp_version(ckpt);
2584         block_t nat_bits_addr;
2585
2586         if (!enabled_nat_bits(sbi, NULL))
2587                 return 0;
2588
2589         nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
2590                                                 F2FS_BLKSIZE - 1);
2591         nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
2592                                                 GFP_KERNEL);
2593         if (!nm_i->nat_bits)
2594                 return -ENOMEM;
2595
2596         nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
2597                                                 nm_i->nat_bits_blocks;
2598         for (i = 0; i < nm_i->nat_bits_blocks; i++) {
2599                 struct page *page = get_meta_page(sbi, nat_bits_addr++);
2600
2601                 memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
2602                                         page_address(page), F2FS_BLKSIZE);
2603                 f2fs_put_page(page, 1);
2604         }
2605
2606         cp_ver |= (cur_cp_crc(ckpt) << 32);
2607         if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
2608                 disable_nat_bits(sbi, true);
2609                 return 0;
2610         }
2611
2612         nm_i->full_nat_bits = nm_i->nat_bits + 8;
2613         nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
2614
2615         f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
2616         return 0;
2617 }
2618
2619 static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
2620 {
2621         struct f2fs_nm_info *nm_i = NM_I(sbi);
2622         unsigned int i = 0;
2623         nid_t nid, last_nid;
2624
2625         if (!enabled_nat_bits(sbi, NULL))
2626                 return;
2627
2628         for (i = 0; i < nm_i->nat_blocks; i++) {
2629                 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
2630                 if (i >= nm_i->nat_blocks)
2631                         break;
2632
2633                 __set_bit_le(i, nm_i->nat_block_bitmap);
2634
2635                 nid = i * NAT_ENTRY_PER_BLOCK;
2636                 last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
2637
2638                 spin_lock(&NM_I(sbi)->nid_list_lock);
2639                 for (; nid < last_nid; nid++)
2640                         update_free_nid_bitmap(sbi, nid, true, true);
2641                 spin_unlock(&NM_I(sbi)->nid_list_lock);
2642         }
2643
2644         for (i = 0; i < nm_i->nat_blocks; i++) {
2645                 i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
2646                 if (i >= nm_i->nat_blocks)
2647                         break;
2648
2649                 __set_bit_le(i, nm_i->nat_block_bitmap);
2650         }
2651 }
2652
2653 static int init_node_manager(struct f2fs_sb_info *sbi)
2654 {
2655         struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
2656         struct f2fs_nm_info *nm_i = NM_I(sbi);
2657         unsigned char *version_bitmap;
2658         unsigned int nat_segs;
2659         int err;
2660
2661         nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
2662
2663         /* segment_count_nat includes pair segment so divide to 2. */
2664         nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
2665         nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
2666         nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
2667
2668         /* not used nids: 0, node, meta, (and root counted as valid node) */
2669         nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
2670                                                         F2FS_RESERVED_NODE_NUM;
2671         nm_i->nid_cnt[FREE_NID_LIST] = 0;
2672         nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
2673         nm_i->nat_cnt = 0;
2674         nm_i->ram_thresh = DEF_RAM_THRESHOLD;
2675         nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
2676         nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
2677
2678         INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
2679         INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]);
2680         INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
2681         INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
2682         INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
2683         INIT_LIST_HEAD(&nm_i->nat_entries);
2684
2685         mutex_init(&nm_i->build_lock);
2686         spin_lock_init(&nm_i->nid_list_lock);
2687         init_rwsem(&nm_i->nat_tree_lock);
2688
2689         nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
2690         nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
2691         version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
2692         if (!version_bitmap)
2693                 return -EFAULT;
2694
2695         nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
2696                                         GFP_KERNEL);
2697         if (!nm_i->nat_bitmap)
2698                 return -ENOMEM;
2699
2700         err = __get_nat_bitmaps(sbi);
2701         if (err)
2702                 return err;
2703
2704 #ifdef CONFIG_F2FS_CHECK_FS
2705         nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
2706                                         GFP_KERNEL);
2707         if (!nm_i->nat_bitmap_mir)
2708                 return -ENOMEM;
2709 #endif
2710
2711         return 0;
2712 }
2713
2714 static int init_free_nid_cache(struct f2fs_sb_info *sbi)
2715 {
2716         struct f2fs_nm_info *nm_i = NM_I(sbi);
2717
2718         nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks *
2719                                         NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
2720         if (!nm_i->free_nid_bitmap)
2721                 return -ENOMEM;
2722
2723         nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8,
2724                                                                 GFP_KERNEL);
2725         if (!nm_i->nat_block_bitmap)
2726                 return -ENOMEM;
2727
2728         nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks *
2729                                         sizeof(unsigned short), GFP_KERNEL);
2730         if (!nm_i->free_nid_count)
2731                 return -ENOMEM;
2732         return 0;
2733 }
2734
2735 int build_node_manager(struct f2fs_sb_info *sbi)
2736 {
2737         int err;
2738
2739         sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
2740         if (!sbi->nm_info)
2741                 return -ENOMEM;
2742
2743         err = init_node_manager(sbi);
2744         if (err)
2745                 return err;
2746
2747         err = init_free_nid_cache(sbi);
2748         if (err)
2749                 return err;
2750
2751         /* load free nid status from nat_bits table */
2752         load_free_nid_bitmap(sbi);
2753
2754         build_free_nids(sbi, true, true);
2755         return 0;
2756 }
2757
2758 void destroy_node_manager(struct f2fs_sb_info *sbi)
2759 {
2760         struct f2fs_nm_info *nm_i = NM_I(sbi);
2761         struct free_nid *i, *next_i;
2762         struct nat_entry *natvec[NATVEC_SIZE];
2763         struct nat_entry_set *setvec[SETVEC_SIZE];
2764         nid_t nid = 0;
2765         unsigned int found;
2766
2767         if (!nm_i)
2768                 return;
2769
2770         /* destroy free nid list */
2771         spin_lock(&nm_i->nid_list_lock);
2772         list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST],
2773                                                                         list) {
2774                 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
2775                 spin_unlock(&nm_i->nid_list_lock);
2776                 kmem_cache_free(free_nid_slab, i);
2777                 spin_lock(&nm_i->nid_list_lock);
2778         }
2779         f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]);
2780         f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]);
2781         f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST]));
2782         spin_unlock(&nm_i->nid_list_lock);
2783
2784         /* destroy nat cache */
2785         down_write(&nm_i->nat_tree_lock);
2786         while ((found = __gang_lookup_nat_cache(nm_i,
2787                                         nid, NATVEC_SIZE, natvec))) {
2788                 unsigned idx;
2789
2790                 nid = nat_get_nid(natvec[found - 1]) + 1;
2791                 for (idx = 0; idx < found; idx++)
2792                         __del_from_nat_cache(nm_i, natvec[idx]);
2793         }
2794         f2fs_bug_on(sbi, nm_i->nat_cnt);
2795
2796         /* destroy nat set cache */
2797         nid = 0;
2798         while ((found = __gang_lookup_nat_set(nm_i,
2799                                         nid, SETVEC_SIZE, setvec))) {
2800                 unsigned idx;
2801
2802                 nid = setvec[found - 1]->set + 1;
2803                 for (idx = 0; idx < found; idx++) {
2804                         /* entry_cnt is not zero, when cp_error was occurred */
2805                         f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
2806                         radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
2807                         kmem_cache_free(nat_entry_set_slab, setvec[idx]);
2808                 }
2809         }
2810         up_write(&nm_i->nat_tree_lock);
2811
2812         kvfree(nm_i->nat_block_bitmap);
2813         kvfree(nm_i->free_nid_bitmap);
2814         kvfree(nm_i->free_nid_count);
2815
2816         kfree(nm_i->nat_bitmap);
2817         kfree(nm_i->nat_bits);
2818 #ifdef CONFIG_F2FS_CHECK_FS
2819         kfree(nm_i->nat_bitmap_mir);
2820 #endif
2821         sbi->nm_info = NULL;
2822         kfree(nm_i);
2823 }
2824
2825 int __init create_node_manager_caches(void)
2826 {
2827         nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
2828                         sizeof(struct nat_entry));
2829         if (!nat_entry_slab)
2830                 goto fail;
2831
2832         free_nid_slab = f2fs_kmem_cache_create("free_nid",
2833                         sizeof(struct free_nid));
2834         if (!free_nid_slab)
2835                 goto destroy_nat_entry;
2836
2837         nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2838                         sizeof(struct nat_entry_set));
2839         if (!nat_entry_set_slab)
2840                 goto destroy_free_nid;
2841         return 0;
2842
2843 destroy_free_nid:
2844         kmem_cache_destroy(free_nid_slab);
2845 destroy_nat_entry:
2846         kmem_cache_destroy(nat_entry_slab);
2847 fail:
2848         return -ENOMEM;
2849 }
2850
2851 void destroy_node_manager_caches(void)
2852 {
2853         kmem_cache_destroy(nat_entry_set_slab);
2854         kmem_cache_destroy(free_nid_slab);
2855         kmem_cache_destroy(nat_entry_slab);
2856 }