GNU Linux-libre 4.19.264-gnu1
[releases.git] / fs / nilfs2 / btree.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * btree.c - NILFS B-tree.
4  *
5  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6  *
7  * Written by Koji Sato.
8  */
9
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/pagevec.h>
14 #include "nilfs.h"
15 #include "page.h"
16 #include "btnode.h"
17 #include "btree.h"
18 #include "alloc.h"
19 #include "dat.h"
20
21 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
22
23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
24 {
25         struct nilfs_btree_path *path;
26         int level = NILFS_BTREE_LEVEL_DATA;
27
28         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
29         if (path == NULL)
30                 goto out;
31
32         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
33                 path[level].bp_bh = NULL;
34                 path[level].bp_sib_bh = NULL;
35                 path[level].bp_index = 0;
36                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
37                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
38                 path[level].bp_op = NULL;
39         }
40
41 out:
42         return path;
43 }
44
45 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
46 {
47         int level = NILFS_BTREE_LEVEL_DATA;
48
49         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
50                 brelse(path[level].bp_bh);
51
52         kmem_cache_free(nilfs_btree_path_cache, path);
53 }
54
55 /*
56  * B-tree node operations
57  */
58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
59                                      __u64 ptr, struct buffer_head **bhp)
60 {
61         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
62         struct address_space *btnc = btnc_inode->i_mapping;
63         struct buffer_head *bh;
64
65         bh = nilfs_btnode_create_block(btnc, ptr);
66         if (!bh)
67                 return -ENOMEM;
68
69         set_buffer_nilfs_volatile(bh);
70         *bhp = bh;
71         return 0;
72 }
73
74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
75 {
76         return node->bn_flags;
77 }
78
79 static void
80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
81 {
82         node->bn_flags = flags;
83 }
84
85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
86 {
87         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
88 }
89
90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
91 {
92         return node->bn_level;
93 }
94
95 static void
96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
97 {
98         node->bn_level = level;
99 }
100
101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
102 {
103         return le16_to_cpu(node->bn_nchildren);
104 }
105
106 static void
107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
108 {
109         node->bn_nchildren = cpu_to_le16(nchildren);
110 }
111
112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
113 {
114         return i_blocksize(btree->b_inode);
115 }
116
117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
118 {
119         return btree->b_nchildren_per_block;
120 }
121
122 static __le64 *
123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
124 {
125         return (__le64 *)((char *)(node + 1) +
126                           (nilfs_btree_node_root(node) ?
127                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
128 }
129
130 static __le64 *
131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
132 {
133         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
134 }
135
136 static __u64
137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
138 {
139         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
140 }
141
142 static void
143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
144 {
145         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
146 }
147
148 static __u64
149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
150                          int ncmax)
151 {
152         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
153 }
154
155 static void
156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
157                          int ncmax)
158 {
159         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
160 }
161
162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
163                                   int level, int nchildren, int ncmax,
164                                   const __u64 *keys, const __u64 *ptrs)
165 {
166         __le64 *dkeys;
167         __le64 *dptrs;
168         int i;
169
170         nilfs_btree_node_set_flags(node, flags);
171         nilfs_btree_node_set_level(node, level);
172         nilfs_btree_node_set_nchildren(node, nchildren);
173
174         dkeys = nilfs_btree_node_dkeys(node);
175         dptrs = nilfs_btree_node_dptrs(node, ncmax);
176         for (i = 0; i < nchildren; i++) {
177                 dkeys[i] = cpu_to_le64(keys[i]);
178                 dptrs[i] = cpu_to_le64(ptrs[i]);
179         }
180 }
181
182 /* Assume the buffer heads corresponding to left and right are locked. */
183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
184                                        struct nilfs_btree_node *right,
185                                        int n, int lncmax, int rncmax)
186 {
187         __le64 *ldkeys, *rdkeys;
188         __le64 *ldptrs, *rdptrs;
189         int lnchildren, rnchildren;
190
191         ldkeys = nilfs_btree_node_dkeys(left);
192         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
193         lnchildren = nilfs_btree_node_get_nchildren(left);
194
195         rdkeys = nilfs_btree_node_dkeys(right);
196         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
197         rnchildren = nilfs_btree_node_get_nchildren(right);
198
199         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
200         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
201         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
202         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
203
204         lnchildren += n;
205         rnchildren -= n;
206         nilfs_btree_node_set_nchildren(left, lnchildren);
207         nilfs_btree_node_set_nchildren(right, rnchildren);
208 }
209
210 /* Assume that the buffer heads corresponding to left and right are locked. */
211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
212                                         struct nilfs_btree_node *right,
213                                         int n, int lncmax, int rncmax)
214 {
215         __le64 *ldkeys, *rdkeys;
216         __le64 *ldptrs, *rdptrs;
217         int lnchildren, rnchildren;
218
219         ldkeys = nilfs_btree_node_dkeys(left);
220         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
221         lnchildren = nilfs_btree_node_get_nchildren(left);
222
223         rdkeys = nilfs_btree_node_dkeys(right);
224         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
225         rnchildren = nilfs_btree_node_get_nchildren(right);
226
227         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
228         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
229         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
230         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
231
232         lnchildren -= n;
233         rnchildren += n;
234         nilfs_btree_node_set_nchildren(left, lnchildren);
235         nilfs_btree_node_set_nchildren(right, rnchildren);
236 }
237
238 /* Assume that the buffer head corresponding to node is locked. */
239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
240                                     __u64 key, __u64 ptr, int ncmax)
241 {
242         __le64 *dkeys;
243         __le64 *dptrs;
244         int nchildren;
245
246         dkeys = nilfs_btree_node_dkeys(node);
247         dptrs = nilfs_btree_node_dptrs(node, ncmax);
248         nchildren = nilfs_btree_node_get_nchildren(node);
249         if (index < nchildren) {
250                 memmove(dkeys + index + 1, dkeys + index,
251                         (nchildren - index) * sizeof(*dkeys));
252                 memmove(dptrs + index + 1, dptrs + index,
253                         (nchildren - index) * sizeof(*dptrs));
254         }
255         dkeys[index] = cpu_to_le64(key);
256         dptrs[index] = cpu_to_le64(ptr);
257         nchildren++;
258         nilfs_btree_node_set_nchildren(node, nchildren);
259 }
260
261 /* Assume that the buffer head corresponding to node is locked. */
262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
263                                     __u64 *keyp, __u64 *ptrp, int ncmax)
264 {
265         __u64 key;
266         __u64 ptr;
267         __le64 *dkeys;
268         __le64 *dptrs;
269         int nchildren;
270
271         dkeys = nilfs_btree_node_dkeys(node);
272         dptrs = nilfs_btree_node_dptrs(node, ncmax);
273         key = le64_to_cpu(dkeys[index]);
274         ptr = le64_to_cpu(dptrs[index]);
275         nchildren = nilfs_btree_node_get_nchildren(node);
276         if (keyp != NULL)
277                 *keyp = key;
278         if (ptrp != NULL)
279                 *ptrp = ptr;
280
281         if (index < nchildren - 1) {
282                 memmove(dkeys + index, dkeys + index + 1,
283                         (nchildren - index - 1) * sizeof(*dkeys));
284                 memmove(dptrs + index, dptrs + index + 1,
285                         (nchildren - index - 1) * sizeof(*dptrs));
286         }
287         nchildren--;
288         nilfs_btree_node_set_nchildren(node, nchildren);
289 }
290
291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
292                                    __u64 key, int *indexp)
293 {
294         __u64 nkey;
295         int index, low, high, s;
296
297         /* binary search */
298         low = 0;
299         high = nilfs_btree_node_get_nchildren(node) - 1;
300         index = 0;
301         s = 0;
302         while (low <= high) {
303                 index = (low + high) / 2;
304                 nkey = nilfs_btree_node_get_key(node, index);
305                 if (nkey == key) {
306                         s = 0;
307                         goto out;
308                 } else if (nkey < key) {
309                         low = index + 1;
310                         s = -1;
311                 } else {
312                         high = index - 1;
313                         s = 1;
314                 }
315         }
316
317         /* adjust index */
318         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
319                 if (s > 0 && index > 0)
320                         index--;
321         } else if (s < 0)
322                 index++;
323
324  out:
325         *indexp = index;
326
327         return s == 0;
328 }
329
330 /**
331  * nilfs_btree_node_broken - verify consistency of btree node
332  * @node: btree node block to be examined
333  * @size: node size (in bytes)
334  * @inode: host inode of btree
335  * @blocknr: block number
336  *
337  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
338  */
339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
340                                    size_t size, struct inode *inode,
341                                    sector_t blocknr)
342 {
343         int level, flags, nchildren;
344         int ret = 0;
345
346         level = nilfs_btree_node_get_level(node);
347         flags = nilfs_btree_node_get_flags(node);
348         nchildren = nilfs_btree_node_get_nchildren(node);
349
350         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
351                      level >= NILFS_BTREE_LEVEL_MAX ||
352                      (flags & NILFS_BTREE_NODE_ROOT) ||
353                      nchildren < 0 ||
354                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
355                 nilfs_msg(inode->i_sb, KERN_CRIT,
356                           "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
357                           inode->i_ino, (unsigned long long)blocknr, level,
358                           flags, nchildren);
359                 ret = 1;
360         }
361         return ret;
362 }
363
364 /**
365  * nilfs_btree_root_broken - verify consistency of btree root node
366  * @node: btree root node to be examined
367  * @inode: host inode of btree
368  *
369  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
370  */
371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
372                                    struct inode *inode)
373 {
374         int level, flags, nchildren;
375         int ret = 0;
376
377         level = nilfs_btree_node_get_level(node);
378         flags = nilfs_btree_node_get_flags(node);
379         nchildren = nilfs_btree_node_get_nchildren(node);
380
381         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
382                      level >= NILFS_BTREE_LEVEL_MAX ||
383                      nchildren < 0 ||
384                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
385                 nilfs_msg(inode->i_sb, KERN_CRIT,
386                           "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
387                           inode->i_ino, level, flags, nchildren);
388                 ret = 1;
389         }
390         return ret;
391 }
392
393 int nilfs_btree_broken_node_block(struct buffer_head *bh)
394 {
395         struct inode *inode;
396         int ret;
397
398         if (buffer_nilfs_checked(bh))
399                 return 0;
400
401         inode = bh->b_page->mapping->host;
402         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
403                                       bh->b_size, inode, bh->b_blocknr);
404         if (likely(!ret))
405                 set_buffer_nilfs_checked(bh);
406         return ret;
407 }
408
409 static struct nilfs_btree_node *
410 nilfs_btree_get_root(const struct nilfs_bmap *btree)
411 {
412         return (struct nilfs_btree_node *)btree->b_u.u_data;
413 }
414
415 static struct nilfs_btree_node *
416 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
417 {
418         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
419 }
420
421 static struct nilfs_btree_node *
422 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
423 {
424         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
425 }
426
427 static int nilfs_btree_height(const struct nilfs_bmap *btree)
428 {
429         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
430 }
431
432 static struct nilfs_btree_node *
433 nilfs_btree_get_node(const struct nilfs_bmap *btree,
434                      const struct nilfs_btree_path *path,
435                      int level, int *ncmaxp)
436 {
437         struct nilfs_btree_node *node;
438
439         if (level == nilfs_btree_height(btree) - 1) {
440                 node = nilfs_btree_get_root(btree);
441                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
442         } else {
443                 node = nilfs_btree_get_nonroot_node(path, level);
444                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
445         }
446         return node;
447 }
448
449 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
450                                 struct nilfs_btree_node *node, int level)
451 {
452         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
453                 dump_stack();
454                 nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
455                           "btree level mismatch (ino=%lu): %d != %d",
456                           btree->b_inode->i_ino,
457                           nilfs_btree_node_get_level(node), level);
458                 return 1;
459         }
460         return 0;
461 }
462
463 struct nilfs_btree_readahead_info {
464         struct nilfs_btree_node *node;  /* parent node */
465         int max_ra_blocks;              /* max nof blocks to read ahead */
466         int index;                      /* current index on the parent node */
467         int ncmax;                      /* nof children in the parent node */
468 };
469
470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471                                    struct buffer_head **bhp,
472                                    const struct nilfs_btree_readahead_info *ra)
473 {
474         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
475         struct address_space *btnc = btnc_inode->i_mapping;
476         struct buffer_head *bh, *ra_bh;
477         sector_t submit_ptr = 0;
478         int ret;
479
480         ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
481                                         &submit_ptr);
482         if (ret) {
483                 if (ret != -EEXIST)
484                         return ret;
485                 goto out_check;
486         }
487
488         if (ra) {
489                 int i, n;
490                 __u64 ptr2;
491
492                 /* read ahead sibling nodes */
493                 for (n = ra->max_ra_blocks, i = ra->index + 1;
494                      n > 0 && i < ra->ncmax; n--, i++) {
495                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
496
497                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
498                                                         REQ_OP_READ, REQ_RAHEAD,
499                                                         &ra_bh, &submit_ptr);
500                         if (likely(!ret || ret == -EEXIST))
501                                 brelse(ra_bh);
502                         else if (ret != -EBUSY)
503                                 break;
504                         if (!buffer_locked(bh))
505                                 goto out_no_wait;
506                 }
507         }
508
509         wait_on_buffer(bh);
510
511  out_no_wait:
512         if (!buffer_uptodate(bh)) {
513                 nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
514                           "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
515                           btree->b_inode->i_ino, (unsigned long long)ptr);
516                 brelse(bh);
517                 return -EIO;
518         }
519
520  out_check:
521         if (nilfs_btree_broken_node_block(bh)) {
522                 clear_buffer_uptodate(bh);
523                 brelse(bh);
524                 return -EINVAL;
525         }
526
527         *bhp = bh;
528         return 0;
529 }
530
531 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
532                                    struct buffer_head **bhp)
533 {
534         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
535 }
536
537 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
538                                  struct nilfs_btree_path *path,
539                                  __u64 key, __u64 *ptrp, int minlevel,
540                                  int readahead)
541 {
542         struct nilfs_btree_node *node;
543         struct nilfs_btree_readahead_info p, *ra;
544         __u64 ptr;
545         int level, index, found, ncmax, ret;
546
547         node = nilfs_btree_get_root(btree);
548         level = nilfs_btree_node_get_level(node);
549         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
550                 return -ENOENT;
551
552         found = nilfs_btree_node_lookup(node, key, &index);
553         ptr = nilfs_btree_node_get_ptr(node, index,
554                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
555         path[level].bp_bh = NULL;
556         path[level].bp_index = index;
557
558         ncmax = nilfs_btree_nchildren_per_block(btree);
559
560         while (--level >= minlevel) {
561                 ra = NULL;
562                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
563                         p.node = nilfs_btree_get_node(btree, path, level + 1,
564                                                       &p.ncmax);
565                         p.index = index;
566                         p.max_ra_blocks = 7;
567                         ra = &p;
568                 }
569                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
570                                               ra);
571                 if (ret < 0)
572                         return ret;
573
574                 node = nilfs_btree_get_nonroot_node(path, level);
575                 if (nilfs_btree_bad_node(btree, node, level))
576                         return -EINVAL;
577                 if (!found)
578                         found = nilfs_btree_node_lookup(node, key, &index);
579                 else
580                         index = 0;
581                 if (index < ncmax) {
582                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
583                 } else {
584                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
585                         /* insert */
586                         ptr = NILFS_BMAP_INVALID_PTR;
587                 }
588                 path[level].bp_index = index;
589         }
590         if (!found)
591                 return -ENOENT;
592
593         if (ptrp != NULL)
594                 *ptrp = ptr;
595
596         return 0;
597 }
598
599 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
600                                       struct nilfs_btree_path *path,
601                                       __u64 *keyp, __u64 *ptrp)
602 {
603         struct nilfs_btree_node *node;
604         __u64 ptr;
605         int index, level, ncmax, ret;
606
607         node = nilfs_btree_get_root(btree);
608         index = nilfs_btree_node_get_nchildren(node) - 1;
609         if (index < 0)
610                 return -ENOENT;
611         level = nilfs_btree_node_get_level(node);
612         ptr = nilfs_btree_node_get_ptr(node, index,
613                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
614         path[level].bp_bh = NULL;
615         path[level].bp_index = index;
616         ncmax = nilfs_btree_nchildren_per_block(btree);
617
618         for (level--; level > 0; level--) {
619                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
620                 if (ret < 0)
621                         return ret;
622                 node = nilfs_btree_get_nonroot_node(path, level);
623                 if (nilfs_btree_bad_node(btree, node, level))
624                         return -EINVAL;
625                 index = nilfs_btree_node_get_nchildren(node) - 1;
626                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
627                 path[level].bp_index = index;
628         }
629
630         if (keyp != NULL)
631                 *keyp = nilfs_btree_node_get_key(node, index);
632         if (ptrp != NULL)
633                 *ptrp = ptr;
634
635         return 0;
636 }
637
638 /**
639  * nilfs_btree_get_next_key - get next valid key from btree path array
640  * @btree: bmap struct of btree
641  * @path: array of nilfs_btree_path struct
642  * @minlevel: start level
643  * @nextkey: place to store the next valid key
644  *
645  * Return Value: If a next key was found, 0 is returned. Otherwise,
646  * -ENOENT is returned.
647  */
648 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
649                                     const struct nilfs_btree_path *path,
650                                     int minlevel, __u64 *nextkey)
651 {
652         struct nilfs_btree_node *node;
653         int maxlevel = nilfs_btree_height(btree) - 1;
654         int index, next_adj, level;
655
656         /* Next index is already set to bp_index for leaf nodes. */
657         next_adj = 0;
658         for (level = minlevel; level <= maxlevel; level++) {
659                 if (level == maxlevel)
660                         node = nilfs_btree_get_root(btree);
661                 else
662                         node = nilfs_btree_get_nonroot_node(path, level);
663
664                 index = path[level].bp_index + next_adj;
665                 if (index < nilfs_btree_node_get_nchildren(node)) {
666                         /* Next key is in this node */
667                         *nextkey = nilfs_btree_node_get_key(node, index);
668                         return 0;
669                 }
670                 /* For non-leaf nodes, next index is stored at bp_index + 1. */
671                 next_adj = 1;
672         }
673         return -ENOENT;
674 }
675
676 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
677                               __u64 key, int level, __u64 *ptrp)
678 {
679         struct nilfs_btree_path *path;
680         int ret;
681
682         path = nilfs_btree_alloc_path();
683         if (path == NULL)
684                 return -ENOMEM;
685
686         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
687
688         nilfs_btree_free_path(path);
689
690         return ret;
691 }
692
693 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
694                                      __u64 key, __u64 *ptrp,
695                                      unsigned int maxblocks)
696 {
697         struct nilfs_btree_path *path;
698         struct nilfs_btree_node *node;
699         struct inode *dat = NULL;
700         __u64 ptr, ptr2;
701         sector_t blocknr;
702         int level = NILFS_BTREE_LEVEL_NODE_MIN;
703         int ret, cnt, index, maxlevel, ncmax;
704         struct nilfs_btree_readahead_info p;
705
706         path = nilfs_btree_alloc_path();
707         if (path == NULL)
708                 return -ENOMEM;
709
710         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
711         if (ret < 0)
712                 goto out;
713
714         if (NILFS_BMAP_USE_VBN(btree)) {
715                 dat = nilfs_bmap_get_dat(btree);
716                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
717                 if (ret < 0)
718                         goto out;
719                 ptr = blocknr;
720         }
721         cnt = 1;
722         if (cnt == maxblocks)
723                 goto end;
724
725         maxlevel = nilfs_btree_height(btree) - 1;
726         node = nilfs_btree_get_node(btree, path, level, &ncmax);
727         index = path[level].bp_index + 1;
728         for (;;) {
729                 while (index < nilfs_btree_node_get_nchildren(node)) {
730                         if (nilfs_btree_node_get_key(node, index) !=
731                             key + cnt)
732                                 goto end;
733                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
734                         if (dat) {
735                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
736                                 if (ret < 0)
737                                         goto out;
738                                 ptr2 = blocknr;
739                         }
740                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
741                                 goto end;
742                         index++;
743                         continue;
744                 }
745                 if (level == maxlevel)
746                         break;
747
748                 /* look-up right sibling node */
749                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
750                 p.index = path[level + 1].bp_index + 1;
751                 p.max_ra_blocks = 7;
752                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
753                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
754                         break;
755                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
756                 path[level + 1].bp_index = p.index;
757
758                 brelse(path[level].bp_bh);
759                 path[level].bp_bh = NULL;
760
761                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
762                                               &p);
763                 if (ret < 0)
764                         goto out;
765                 node = nilfs_btree_get_nonroot_node(path, level);
766                 ncmax = nilfs_btree_nchildren_per_block(btree);
767                 index = 0;
768                 path[level].bp_index = index;
769         }
770  end:
771         *ptrp = ptr;
772         ret = cnt;
773  out:
774         nilfs_btree_free_path(path);
775         return ret;
776 }
777
778 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
779                                     struct nilfs_btree_path *path,
780                                     int level, __u64 key)
781 {
782         if (level < nilfs_btree_height(btree) - 1) {
783                 do {
784                         nilfs_btree_node_set_key(
785                                 nilfs_btree_get_nonroot_node(path, level),
786                                 path[level].bp_index, key);
787                         if (!buffer_dirty(path[level].bp_bh))
788                                 mark_buffer_dirty(path[level].bp_bh);
789                 } while ((path[level].bp_index == 0) &&
790                          (++level < nilfs_btree_height(btree) - 1));
791         }
792
793         /* root */
794         if (level == nilfs_btree_height(btree) - 1) {
795                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
796                                          path[level].bp_index, key);
797         }
798 }
799
800 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
801                                   struct nilfs_btree_path *path,
802                                   int level, __u64 *keyp, __u64 *ptrp)
803 {
804         struct nilfs_btree_node *node;
805         int ncblk;
806
807         if (level < nilfs_btree_height(btree) - 1) {
808                 node = nilfs_btree_get_nonroot_node(path, level);
809                 ncblk = nilfs_btree_nchildren_per_block(btree);
810                 nilfs_btree_node_insert(node, path[level].bp_index,
811                                         *keyp, *ptrp, ncblk);
812                 if (!buffer_dirty(path[level].bp_bh))
813                         mark_buffer_dirty(path[level].bp_bh);
814
815                 if (path[level].bp_index == 0)
816                         nilfs_btree_promote_key(btree, path, level + 1,
817                                                 nilfs_btree_node_get_key(node,
818                                                                          0));
819         } else {
820                 node = nilfs_btree_get_root(btree);
821                 nilfs_btree_node_insert(node, path[level].bp_index,
822                                         *keyp, *ptrp,
823                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
824         }
825 }
826
827 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
828                                    struct nilfs_btree_path *path,
829                                    int level, __u64 *keyp, __u64 *ptrp)
830 {
831         struct nilfs_btree_node *node, *left;
832         int nchildren, lnchildren, n, move, ncblk;
833
834         node = nilfs_btree_get_nonroot_node(path, level);
835         left = nilfs_btree_get_sib_node(path, level);
836         nchildren = nilfs_btree_node_get_nchildren(node);
837         lnchildren = nilfs_btree_node_get_nchildren(left);
838         ncblk = nilfs_btree_nchildren_per_block(btree);
839         move = 0;
840
841         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
842         if (n > path[level].bp_index) {
843                 /* move insert point */
844                 n--;
845                 move = 1;
846         }
847
848         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
849
850         if (!buffer_dirty(path[level].bp_bh))
851                 mark_buffer_dirty(path[level].bp_bh);
852         if (!buffer_dirty(path[level].bp_sib_bh))
853                 mark_buffer_dirty(path[level].bp_sib_bh);
854
855         nilfs_btree_promote_key(btree, path, level + 1,
856                                 nilfs_btree_node_get_key(node, 0));
857
858         if (move) {
859                 brelse(path[level].bp_bh);
860                 path[level].bp_bh = path[level].bp_sib_bh;
861                 path[level].bp_sib_bh = NULL;
862                 path[level].bp_index += lnchildren;
863                 path[level + 1].bp_index--;
864         } else {
865                 brelse(path[level].bp_sib_bh);
866                 path[level].bp_sib_bh = NULL;
867                 path[level].bp_index -= n;
868         }
869
870         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
871 }
872
873 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
874                                     struct nilfs_btree_path *path,
875                                     int level, __u64 *keyp, __u64 *ptrp)
876 {
877         struct nilfs_btree_node *node, *right;
878         int nchildren, rnchildren, n, move, ncblk;
879
880         node = nilfs_btree_get_nonroot_node(path, level);
881         right = nilfs_btree_get_sib_node(path, level);
882         nchildren = nilfs_btree_node_get_nchildren(node);
883         rnchildren = nilfs_btree_node_get_nchildren(right);
884         ncblk = nilfs_btree_nchildren_per_block(btree);
885         move = 0;
886
887         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
888         if (n > nchildren - path[level].bp_index) {
889                 /* move insert point */
890                 n--;
891                 move = 1;
892         }
893
894         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
895
896         if (!buffer_dirty(path[level].bp_bh))
897                 mark_buffer_dirty(path[level].bp_bh);
898         if (!buffer_dirty(path[level].bp_sib_bh))
899                 mark_buffer_dirty(path[level].bp_sib_bh);
900
901         path[level + 1].bp_index++;
902         nilfs_btree_promote_key(btree, path, level + 1,
903                                 nilfs_btree_node_get_key(right, 0));
904         path[level + 1].bp_index--;
905
906         if (move) {
907                 brelse(path[level].bp_bh);
908                 path[level].bp_bh = path[level].bp_sib_bh;
909                 path[level].bp_sib_bh = NULL;
910                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
911                 path[level + 1].bp_index++;
912         } else {
913                 brelse(path[level].bp_sib_bh);
914                 path[level].bp_sib_bh = NULL;
915         }
916
917         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
918 }
919
920 static void nilfs_btree_split(struct nilfs_bmap *btree,
921                               struct nilfs_btree_path *path,
922                               int level, __u64 *keyp, __u64 *ptrp)
923 {
924         struct nilfs_btree_node *node, *right;
925         int nchildren, n, move, ncblk;
926
927         node = nilfs_btree_get_nonroot_node(path, level);
928         right = nilfs_btree_get_sib_node(path, level);
929         nchildren = nilfs_btree_node_get_nchildren(node);
930         ncblk = nilfs_btree_nchildren_per_block(btree);
931         move = 0;
932
933         n = (nchildren + 1) / 2;
934         if (n > nchildren - path[level].bp_index) {
935                 n--;
936                 move = 1;
937         }
938
939         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
940
941         if (!buffer_dirty(path[level].bp_bh))
942                 mark_buffer_dirty(path[level].bp_bh);
943         if (!buffer_dirty(path[level].bp_sib_bh))
944                 mark_buffer_dirty(path[level].bp_sib_bh);
945
946         if (move) {
947                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
948                 nilfs_btree_node_insert(right, path[level].bp_index,
949                                         *keyp, *ptrp, ncblk);
950
951                 *keyp = nilfs_btree_node_get_key(right, 0);
952                 *ptrp = path[level].bp_newreq.bpr_ptr;
953
954                 brelse(path[level].bp_bh);
955                 path[level].bp_bh = path[level].bp_sib_bh;
956                 path[level].bp_sib_bh = NULL;
957         } else {
958                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
959
960                 *keyp = nilfs_btree_node_get_key(right, 0);
961                 *ptrp = path[level].bp_newreq.bpr_ptr;
962
963                 brelse(path[level].bp_sib_bh);
964                 path[level].bp_sib_bh = NULL;
965         }
966
967         path[level + 1].bp_index++;
968 }
969
970 static void nilfs_btree_grow(struct nilfs_bmap *btree,
971                              struct nilfs_btree_path *path,
972                              int level, __u64 *keyp, __u64 *ptrp)
973 {
974         struct nilfs_btree_node *root, *child;
975         int n, ncblk;
976
977         root = nilfs_btree_get_root(btree);
978         child = nilfs_btree_get_sib_node(path, level);
979         ncblk = nilfs_btree_nchildren_per_block(btree);
980
981         n = nilfs_btree_node_get_nchildren(root);
982
983         nilfs_btree_node_move_right(root, child, n,
984                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
985         nilfs_btree_node_set_level(root, level + 1);
986
987         if (!buffer_dirty(path[level].bp_sib_bh))
988                 mark_buffer_dirty(path[level].bp_sib_bh);
989
990         path[level].bp_bh = path[level].bp_sib_bh;
991         path[level].bp_sib_bh = NULL;
992
993         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
994
995         *keyp = nilfs_btree_node_get_key(child, 0);
996         *ptrp = path[level].bp_newreq.bpr_ptr;
997 }
998
999 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1000                                    const struct nilfs_btree_path *path)
1001 {
1002         struct nilfs_btree_node *node;
1003         int level, ncmax;
1004
1005         if (path == NULL)
1006                 return NILFS_BMAP_INVALID_PTR;
1007
1008         /* left sibling */
1009         level = NILFS_BTREE_LEVEL_NODE_MIN;
1010         if (path[level].bp_index > 0) {
1011                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1012                 return nilfs_btree_node_get_ptr(node,
1013                                                 path[level].bp_index - 1,
1014                                                 ncmax);
1015         }
1016
1017         /* parent */
1018         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1019         if (level <= nilfs_btree_height(btree) - 1) {
1020                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1021                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1022                                                 ncmax);
1023         }
1024
1025         return NILFS_BMAP_INVALID_PTR;
1026 }
1027
1028 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1029                                        const struct nilfs_btree_path *path,
1030                                        __u64 key)
1031 {
1032         __u64 ptr;
1033
1034         ptr = nilfs_bmap_find_target_seq(btree, key);
1035         if (ptr != NILFS_BMAP_INVALID_PTR)
1036                 /* sequential access */
1037                 return ptr;
1038
1039         ptr = nilfs_btree_find_near(btree, path);
1040         if (ptr != NILFS_BMAP_INVALID_PTR)
1041                 /* near */
1042                 return ptr;
1043
1044         /* block group */
1045         return nilfs_bmap_find_target_in_group(btree);
1046 }
1047
1048 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1049                                       struct nilfs_btree_path *path,
1050                                       int *levelp, __u64 key, __u64 ptr,
1051                                       struct nilfs_bmap_stats *stats)
1052 {
1053         struct buffer_head *bh;
1054         struct nilfs_btree_node *node, *parent, *sib;
1055         __u64 sibptr;
1056         int pindex, level, ncmax, ncblk, ret;
1057         struct inode *dat = NULL;
1058
1059         stats->bs_nblocks = 0;
1060         level = NILFS_BTREE_LEVEL_DATA;
1061
1062         /* allocate a new ptr for data block */
1063         if (NILFS_BMAP_USE_VBN(btree)) {
1064                 path[level].bp_newreq.bpr_ptr =
1065                         nilfs_btree_find_target_v(btree, path, key);
1066                 dat = nilfs_bmap_get_dat(btree);
1067         }
1068
1069         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1070         if (ret < 0)
1071                 goto err_out_data;
1072
1073         ncblk = nilfs_btree_nchildren_per_block(btree);
1074
1075         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1076              level < nilfs_btree_height(btree) - 1;
1077              level++) {
1078                 node = nilfs_btree_get_nonroot_node(path, level);
1079                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1080                         path[level].bp_op = nilfs_btree_do_insert;
1081                         stats->bs_nblocks++;
1082                         goto out;
1083                 }
1084
1085                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1086                 pindex = path[level + 1].bp_index;
1087
1088                 /* left sibling */
1089                 if (pindex > 0) {
1090                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1091                                                           ncmax);
1092                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1093                         if (ret < 0)
1094                                 goto err_out_child_node;
1095                         sib = (struct nilfs_btree_node *)bh->b_data;
1096                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1097                                 path[level].bp_sib_bh = bh;
1098                                 path[level].bp_op = nilfs_btree_carry_left;
1099                                 stats->bs_nblocks++;
1100                                 goto out;
1101                         } else {
1102                                 brelse(bh);
1103                         }
1104                 }
1105
1106                 /* right sibling */
1107                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1108                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1109                                                           ncmax);
1110                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1111                         if (ret < 0)
1112                                 goto err_out_child_node;
1113                         sib = (struct nilfs_btree_node *)bh->b_data;
1114                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1115                                 path[level].bp_sib_bh = bh;
1116                                 path[level].bp_op = nilfs_btree_carry_right;
1117                                 stats->bs_nblocks++;
1118                                 goto out;
1119                         } else {
1120                                 brelse(bh);
1121                         }
1122                 }
1123
1124                 /* split */
1125                 path[level].bp_newreq.bpr_ptr =
1126                         path[level - 1].bp_newreq.bpr_ptr + 1;
1127                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1128                                                    &path[level].bp_newreq, dat);
1129                 if (ret < 0)
1130                         goto err_out_child_node;
1131                 ret = nilfs_btree_get_new_block(btree,
1132                                                 path[level].bp_newreq.bpr_ptr,
1133                                                 &bh);
1134                 if (ret < 0)
1135                         goto err_out_curr_node;
1136
1137                 stats->bs_nblocks++;
1138
1139                 sib = (struct nilfs_btree_node *)bh->b_data;
1140                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1141                 path[level].bp_sib_bh = bh;
1142                 path[level].bp_op = nilfs_btree_split;
1143         }
1144
1145         /* root */
1146         node = nilfs_btree_get_root(btree);
1147         if (nilfs_btree_node_get_nchildren(node) <
1148             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1149                 path[level].bp_op = nilfs_btree_do_insert;
1150                 stats->bs_nblocks++;
1151                 goto out;
1152         }
1153
1154         /* grow */
1155         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1156         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1157         if (ret < 0)
1158                 goto err_out_child_node;
1159         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1160                                         &bh);
1161         if (ret < 0)
1162                 goto err_out_curr_node;
1163
1164         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1165                               0, level, 0, ncblk, NULL, NULL);
1166         path[level].bp_sib_bh = bh;
1167         path[level].bp_op = nilfs_btree_grow;
1168
1169         level++;
1170         path[level].bp_op = nilfs_btree_do_insert;
1171
1172         /* a newly-created node block and a data block are added */
1173         stats->bs_nblocks += 2;
1174
1175         /* success */
1176  out:
1177         *levelp = level;
1178         return ret;
1179
1180         /* error */
1181  err_out_curr_node:
1182         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1183  err_out_child_node:
1184         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1185                 nilfs_btnode_delete(path[level].bp_sib_bh);
1186                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1187
1188         }
1189
1190         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1191  err_out_data:
1192         *levelp = level;
1193         stats->bs_nblocks = 0;
1194         return ret;
1195 }
1196
1197 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1198                                       struct nilfs_btree_path *path,
1199                                       int maxlevel, __u64 key, __u64 ptr)
1200 {
1201         struct inode *dat = NULL;
1202         int level;
1203
1204         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1205         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1206         if (NILFS_BMAP_USE_VBN(btree)) {
1207                 nilfs_bmap_set_target_v(btree, key, ptr);
1208                 dat = nilfs_bmap_get_dat(btree);
1209         }
1210
1211         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1212                 nilfs_bmap_commit_alloc_ptr(btree,
1213                                             &path[level - 1].bp_newreq, dat);
1214                 path[level].bp_op(btree, path, level, &key, &ptr);
1215         }
1216
1217         if (!nilfs_bmap_dirty(btree))
1218                 nilfs_bmap_set_dirty(btree);
1219 }
1220
1221 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1222 {
1223         struct nilfs_btree_path *path;
1224         struct nilfs_bmap_stats stats;
1225         int level, ret;
1226
1227         path = nilfs_btree_alloc_path();
1228         if (path == NULL)
1229                 return -ENOMEM;
1230
1231         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1232                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1233         if (ret != -ENOENT) {
1234                 if (ret == 0)
1235                         ret = -EEXIST;
1236                 goto out;
1237         }
1238
1239         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1240         if (ret < 0)
1241                 goto out;
1242         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1243         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1244
1245  out:
1246         nilfs_btree_free_path(path);
1247         return ret;
1248 }
1249
1250 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1251                                   struct nilfs_btree_path *path,
1252                                   int level, __u64 *keyp, __u64 *ptrp)
1253 {
1254         struct nilfs_btree_node *node;
1255         int ncblk;
1256
1257         if (level < nilfs_btree_height(btree) - 1) {
1258                 node = nilfs_btree_get_nonroot_node(path, level);
1259                 ncblk = nilfs_btree_nchildren_per_block(btree);
1260                 nilfs_btree_node_delete(node, path[level].bp_index,
1261                                         keyp, ptrp, ncblk);
1262                 if (!buffer_dirty(path[level].bp_bh))
1263                         mark_buffer_dirty(path[level].bp_bh);
1264                 if (path[level].bp_index == 0)
1265                         nilfs_btree_promote_key(btree, path, level + 1,
1266                                 nilfs_btree_node_get_key(node, 0));
1267         } else {
1268                 node = nilfs_btree_get_root(btree);
1269                 nilfs_btree_node_delete(node, path[level].bp_index,
1270                                         keyp, ptrp,
1271                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1272         }
1273 }
1274
1275 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1276                                     struct nilfs_btree_path *path,
1277                                     int level, __u64 *keyp, __u64 *ptrp)
1278 {
1279         struct nilfs_btree_node *node, *left;
1280         int nchildren, lnchildren, n, ncblk;
1281
1282         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1283
1284         node = nilfs_btree_get_nonroot_node(path, level);
1285         left = nilfs_btree_get_sib_node(path, level);
1286         nchildren = nilfs_btree_node_get_nchildren(node);
1287         lnchildren = nilfs_btree_node_get_nchildren(left);
1288         ncblk = nilfs_btree_nchildren_per_block(btree);
1289
1290         n = (nchildren + lnchildren) / 2 - nchildren;
1291
1292         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1293
1294         if (!buffer_dirty(path[level].bp_bh))
1295                 mark_buffer_dirty(path[level].bp_bh);
1296         if (!buffer_dirty(path[level].bp_sib_bh))
1297                 mark_buffer_dirty(path[level].bp_sib_bh);
1298
1299         nilfs_btree_promote_key(btree, path, level + 1,
1300                                 nilfs_btree_node_get_key(node, 0));
1301
1302         brelse(path[level].bp_sib_bh);
1303         path[level].bp_sib_bh = NULL;
1304         path[level].bp_index += n;
1305 }
1306
1307 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1308                                      struct nilfs_btree_path *path,
1309                                      int level, __u64 *keyp, __u64 *ptrp)
1310 {
1311         struct nilfs_btree_node *node, *right;
1312         int nchildren, rnchildren, n, ncblk;
1313
1314         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1315
1316         node = nilfs_btree_get_nonroot_node(path, level);
1317         right = nilfs_btree_get_sib_node(path, level);
1318         nchildren = nilfs_btree_node_get_nchildren(node);
1319         rnchildren = nilfs_btree_node_get_nchildren(right);
1320         ncblk = nilfs_btree_nchildren_per_block(btree);
1321
1322         n = (nchildren + rnchildren) / 2 - nchildren;
1323
1324         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1325
1326         if (!buffer_dirty(path[level].bp_bh))
1327                 mark_buffer_dirty(path[level].bp_bh);
1328         if (!buffer_dirty(path[level].bp_sib_bh))
1329                 mark_buffer_dirty(path[level].bp_sib_bh);
1330
1331         path[level + 1].bp_index++;
1332         nilfs_btree_promote_key(btree, path, level + 1,
1333                                 nilfs_btree_node_get_key(right, 0));
1334         path[level + 1].bp_index--;
1335
1336         brelse(path[level].bp_sib_bh);
1337         path[level].bp_sib_bh = NULL;
1338 }
1339
1340 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1341                                     struct nilfs_btree_path *path,
1342                                     int level, __u64 *keyp, __u64 *ptrp)
1343 {
1344         struct nilfs_btree_node *node, *left;
1345         int n, ncblk;
1346
1347         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1348
1349         node = nilfs_btree_get_nonroot_node(path, level);
1350         left = nilfs_btree_get_sib_node(path, level);
1351         ncblk = nilfs_btree_nchildren_per_block(btree);
1352
1353         n = nilfs_btree_node_get_nchildren(node);
1354
1355         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1356
1357         if (!buffer_dirty(path[level].bp_sib_bh))
1358                 mark_buffer_dirty(path[level].bp_sib_bh);
1359
1360         nilfs_btnode_delete(path[level].bp_bh);
1361         path[level].bp_bh = path[level].bp_sib_bh;
1362         path[level].bp_sib_bh = NULL;
1363         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1364 }
1365
1366 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1367                                      struct nilfs_btree_path *path,
1368                                      int level, __u64 *keyp, __u64 *ptrp)
1369 {
1370         struct nilfs_btree_node *node, *right;
1371         int n, ncblk;
1372
1373         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1374
1375         node = nilfs_btree_get_nonroot_node(path, level);
1376         right = nilfs_btree_get_sib_node(path, level);
1377         ncblk = nilfs_btree_nchildren_per_block(btree);
1378
1379         n = nilfs_btree_node_get_nchildren(right);
1380
1381         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1382
1383         if (!buffer_dirty(path[level].bp_bh))
1384                 mark_buffer_dirty(path[level].bp_bh);
1385
1386         nilfs_btnode_delete(path[level].bp_sib_bh);
1387         path[level].bp_sib_bh = NULL;
1388         path[level + 1].bp_index++;
1389 }
1390
1391 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1392                                struct nilfs_btree_path *path,
1393                                int level, __u64 *keyp, __u64 *ptrp)
1394 {
1395         struct nilfs_btree_node *root, *child;
1396         int n, ncblk;
1397
1398         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1399
1400         root = nilfs_btree_get_root(btree);
1401         child = nilfs_btree_get_nonroot_node(path, level);
1402         ncblk = nilfs_btree_nchildren_per_block(btree);
1403
1404         nilfs_btree_node_delete(root, 0, NULL, NULL,
1405                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1406         nilfs_btree_node_set_level(root, level);
1407         n = nilfs_btree_node_get_nchildren(child);
1408         nilfs_btree_node_move_left(root, child, n,
1409                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1410
1411         nilfs_btnode_delete(path[level].bp_bh);
1412         path[level].bp_bh = NULL;
1413 }
1414
1415 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1416                             struct nilfs_btree_path *path,
1417                             int level, __u64 *keyp, __u64 *ptrp)
1418 {
1419 }
1420
1421 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1422                                       struct nilfs_btree_path *path,
1423                                       int *levelp,
1424                                       struct nilfs_bmap_stats *stats,
1425                                       struct inode *dat)
1426 {
1427         struct buffer_head *bh;
1428         struct nilfs_btree_node *node, *parent, *sib;
1429         __u64 sibptr;
1430         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1431
1432         ret = 0;
1433         stats->bs_nblocks = 0;
1434         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1435         ncblk = nilfs_btree_nchildren_per_block(btree);
1436
1437         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1438              level < nilfs_btree_height(btree) - 1;
1439              level++) {
1440                 node = nilfs_btree_get_nonroot_node(path, level);
1441                 path[level].bp_oldreq.bpr_ptr =
1442                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1443                 ret = nilfs_bmap_prepare_end_ptr(btree,
1444                                                  &path[level].bp_oldreq, dat);
1445                 if (ret < 0)
1446                         goto err_out_child_node;
1447
1448                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1449                         path[level].bp_op = nilfs_btree_do_delete;
1450                         stats->bs_nblocks++;
1451                         goto out;
1452                 }
1453
1454                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1455                 pindex = path[level + 1].bp_index;
1456                 dindex = pindex;
1457
1458                 if (pindex > 0) {
1459                         /* left sibling */
1460                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1461                                                           ncmax);
1462                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1463                         if (ret < 0)
1464                                 goto err_out_curr_node;
1465                         sib = (struct nilfs_btree_node *)bh->b_data;
1466                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1467                                 path[level].bp_sib_bh = bh;
1468                                 path[level].bp_op = nilfs_btree_borrow_left;
1469                                 stats->bs_nblocks++;
1470                                 goto out;
1471                         } else {
1472                                 path[level].bp_sib_bh = bh;
1473                                 path[level].bp_op = nilfs_btree_concat_left;
1474                                 stats->bs_nblocks++;
1475                                 /* continue; */
1476                         }
1477                 } else if (pindex <
1478                            nilfs_btree_node_get_nchildren(parent) - 1) {
1479                         /* right sibling */
1480                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1481                                                           ncmax);
1482                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1483                         if (ret < 0)
1484                                 goto err_out_curr_node;
1485                         sib = (struct nilfs_btree_node *)bh->b_data;
1486                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1487                                 path[level].bp_sib_bh = bh;
1488                                 path[level].bp_op = nilfs_btree_borrow_right;
1489                                 stats->bs_nblocks++;
1490                                 goto out;
1491                         } else {
1492                                 path[level].bp_sib_bh = bh;
1493                                 path[level].bp_op = nilfs_btree_concat_right;
1494                                 stats->bs_nblocks++;
1495                                 /*
1496                                  * When merging right sibling node
1497                                  * into the current node, pointer to
1498                                  * the right sibling node must be
1499                                  * terminated instead.  The adjustment
1500                                  * below is required for that.
1501                                  */
1502                                 dindex = pindex + 1;
1503                                 /* continue; */
1504                         }
1505                 } else {
1506                         /* no siblings */
1507                         /* the only child of the root node */
1508                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1509                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1510                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1511                                 path[level].bp_op = nilfs_btree_shrink;
1512                                 stats->bs_nblocks += 2;
1513                                 level++;
1514                                 path[level].bp_op = nilfs_btree_nop;
1515                                 goto shrink_root_child;
1516                         } else {
1517                                 path[level].bp_op = nilfs_btree_do_delete;
1518                                 stats->bs_nblocks++;
1519                                 goto out;
1520                         }
1521                 }
1522         }
1523
1524         /* child of the root node is deleted */
1525         path[level].bp_op = nilfs_btree_do_delete;
1526         stats->bs_nblocks++;
1527
1528 shrink_root_child:
1529         node = nilfs_btree_get_root(btree);
1530         path[level].bp_oldreq.bpr_ptr =
1531                 nilfs_btree_node_get_ptr(node, dindex,
1532                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1533
1534         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1535         if (ret < 0)
1536                 goto err_out_child_node;
1537
1538         /* success */
1539  out:
1540         *levelp = level;
1541         return ret;
1542
1543         /* error */
1544  err_out_curr_node:
1545         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1546  err_out_child_node:
1547         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1548                 brelse(path[level].bp_sib_bh);
1549                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1550         }
1551         *levelp = level;
1552         stats->bs_nblocks = 0;
1553         return ret;
1554 }
1555
1556 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1557                                       struct nilfs_btree_path *path,
1558                                       int maxlevel, struct inode *dat)
1559 {
1560         int level;
1561
1562         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1563                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1564                 path[level].bp_op(btree, path, level, NULL, NULL);
1565         }
1566
1567         if (!nilfs_bmap_dirty(btree))
1568                 nilfs_bmap_set_dirty(btree);
1569 }
1570
1571 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1572
1573 {
1574         struct nilfs_btree_path *path;
1575         struct nilfs_bmap_stats stats;
1576         struct inode *dat;
1577         int level, ret;
1578
1579         path = nilfs_btree_alloc_path();
1580         if (path == NULL)
1581                 return -ENOMEM;
1582
1583         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1584                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1585         if (ret < 0)
1586                 goto out;
1587
1588
1589         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1590
1591         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1592         if (ret < 0)
1593                 goto out;
1594         nilfs_btree_commit_delete(btree, path, level, dat);
1595         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1596
1597 out:
1598         nilfs_btree_free_path(path);
1599         return ret;
1600 }
1601
1602 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1603                                 __u64 *keyp)
1604 {
1605         struct nilfs_btree_path *path;
1606         const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1607         int ret;
1608
1609         path = nilfs_btree_alloc_path();
1610         if (!path)
1611                 return -ENOMEM;
1612
1613         ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1614         if (!ret)
1615                 *keyp = start;
1616         else if (ret == -ENOENT)
1617                 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1618
1619         nilfs_btree_free_path(path);
1620         return ret;
1621 }
1622
1623 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1624 {
1625         struct nilfs_btree_path *path;
1626         int ret;
1627
1628         path = nilfs_btree_alloc_path();
1629         if (path == NULL)
1630                 return -ENOMEM;
1631
1632         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1633
1634         nilfs_btree_free_path(path);
1635
1636         return ret;
1637 }
1638
1639 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1640 {
1641         struct buffer_head *bh;
1642         struct nilfs_btree_node *root, *node;
1643         __u64 maxkey, nextmaxkey;
1644         __u64 ptr;
1645         int nchildren, ret;
1646
1647         root = nilfs_btree_get_root(btree);
1648         switch (nilfs_btree_height(btree)) {
1649         case 2:
1650                 bh = NULL;
1651                 node = root;
1652                 break;
1653         case 3:
1654                 nchildren = nilfs_btree_node_get_nchildren(root);
1655                 if (nchildren > 1)
1656                         return 0;
1657                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1658                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1659                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1660                 if (ret < 0)
1661                         return ret;
1662                 node = (struct nilfs_btree_node *)bh->b_data;
1663                 break;
1664         default:
1665                 return 0;
1666         }
1667
1668         nchildren = nilfs_btree_node_get_nchildren(node);
1669         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1670         nextmaxkey = (nchildren > 1) ?
1671                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1672         if (bh != NULL)
1673                 brelse(bh);
1674
1675         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1676 }
1677
1678 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1679                                    __u64 *keys, __u64 *ptrs, int nitems)
1680 {
1681         struct buffer_head *bh;
1682         struct nilfs_btree_node *node, *root;
1683         __le64 *dkeys;
1684         __le64 *dptrs;
1685         __u64 ptr;
1686         int nchildren, ncmax, i, ret;
1687
1688         root = nilfs_btree_get_root(btree);
1689         switch (nilfs_btree_height(btree)) {
1690         case 2:
1691                 bh = NULL;
1692                 node = root;
1693                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1694                 break;
1695         case 3:
1696                 nchildren = nilfs_btree_node_get_nchildren(root);
1697                 WARN_ON(nchildren > 1);
1698                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1699                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1700                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1701                 if (ret < 0)
1702                         return ret;
1703                 node = (struct nilfs_btree_node *)bh->b_data;
1704                 ncmax = nilfs_btree_nchildren_per_block(btree);
1705                 break;
1706         default:
1707                 node = NULL;
1708                 return -EINVAL;
1709         }
1710
1711         nchildren = nilfs_btree_node_get_nchildren(node);
1712         if (nchildren < nitems)
1713                 nitems = nchildren;
1714         dkeys = nilfs_btree_node_dkeys(node);
1715         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1716         for (i = 0; i < nitems; i++) {
1717                 keys[i] = le64_to_cpu(dkeys[i]);
1718                 ptrs[i] = le64_to_cpu(dptrs[i]);
1719         }
1720
1721         if (bh != NULL)
1722                 brelse(bh);
1723
1724         return nitems;
1725 }
1726
1727 static int
1728 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1729                                        union nilfs_bmap_ptr_req *dreq,
1730                                        union nilfs_bmap_ptr_req *nreq,
1731                                        struct buffer_head **bhp,
1732                                        struct nilfs_bmap_stats *stats)
1733 {
1734         struct buffer_head *bh;
1735         struct inode *dat = NULL;
1736         int ret;
1737
1738         stats->bs_nblocks = 0;
1739
1740         /* for data */
1741         /* cannot find near ptr */
1742         if (NILFS_BMAP_USE_VBN(btree)) {
1743                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1744                 dat = nilfs_bmap_get_dat(btree);
1745         }
1746
1747         ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1748         if (ret < 0)
1749                 return ret;
1750
1751         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1752         if (ret < 0)
1753                 return ret;
1754
1755         *bhp = NULL;
1756         stats->bs_nblocks++;
1757         if (nreq != NULL) {
1758                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1759                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1760                 if (ret < 0)
1761                         goto err_out_dreq;
1762
1763                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1764                 if (ret < 0)
1765                         goto err_out_nreq;
1766
1767                 *bhp = bh;
1768                 stats->bs_nblocks++;
1769         }
1770
1771         /* success */
1772         return 0;
1773
1774         /* error */
1775  err_out_nreq:
1776         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1777  err_out_dreq:
1778         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1779         stats->bs_nblocks = 0;
1780         return ret;
1781
1782 }
1783
1784 static void
1785 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1786                                       __u64 key, __u64 ptr,
1787                                       const __u64 *keys, const __u64 *ptrs,
1788                                       int n,
1789                                       union nilfs_bmap_ptr_req *dreq,
1790                                       union nilfs_bmap_ptr_req *nreq,
1791                                       struct buffer_head *bh)
1792 {
1793         struct nilfs_btree_node *node;
1794         struct inode *dat;
1795         __u64 tmpptr;
1796         int ncblk;
1797
1798         /* free resources */
1799         if (btree->b_ops->bop_clear != NULL)
1800                 btree->b_ops->bop_clear(btree);
1801
1802         /* ptr must be a pointer to a buffer head. */
1803         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1804
1805         /* convert and insert */
1806         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1807         __nilfs_btree_init(btree);
1808         if (nreq != NULL) {
1809                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1810                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1811
1812                 /* create child node at level 1 */
1813                 node = (struct nilfs_btree_node *)bh->b_data;
1814                 ncblk = nilfs_btree_nchildren_per_block(btree);
1815                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1816                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1817                 if (!buffer_dirty(bh))
1818                         mark_buffer_dirty(bh);
1819                 if (!nilfs_bmap_dirty(btree))
1820                         nilfs_bmap_set_dirty(btree);
1821
1822                 brelse(bh);
1823
1824                 /* create root node at level 2 */
1825                 node = nilfs_btree_get_root(btree);
1826                 tmpptr = nreq->bpr_ptr;
1827                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1828                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1829                                       &keys[0], &tmpptr);
1830         } else {
1831                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1832
1833                 /* create root node at level 1 */
1834                 node = nilfs_btree_get_root(btree);
1835                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1836                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1837                                       keys, ptrs);
1838                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1839                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1840                 if (!nilfs_bmap_dirty(btree))
1841                         nilfs_bmap_set_dirty(btree);
1842         }
1843
1844         if (NILFS_BMAP_USE_VBN(btree))
1845                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1846 }
1847
1848 /**
1849  * nilfs_btree_convert_and_insert -
1850  * @bmap:
1851  * @key:
1852  * @ptr:
1853  * @keys:
1854  * @ptrs:
1855  * @n:
1856  */
1857 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1858                                    __u64 key, __u64 ptr,
1859                                    const __u64 *keys, const __u64 *ptrs, int n)
1860 {
1861         struct buffer_head *bh = NULL;
1862         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1863         struct nilfs_bmap_stats stats;
1864         int ret;
1865
1866         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1867                 di = &dreq;
1868                 ni = NULL;
1869         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1870                            nilfs_btree_node_size(btree))) {
1871                 di = &dreq;
1872                 ni = &nreq;
1873         } else {
1874                 di = NULL;
1875                 ni = NULL;
1876                 BUG();
1877         }
1878
1879         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1880                                                      &stats);
1881         if (ret < 0)
1882                 return ret;
1883         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1884                                               di, ni, bh);
1885         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1886         return 0;
1887 }
1888
1889 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1890                                    struct nilfs_btree_path *path,
1891                                    int level,
1892                                    struct buffer_head *bh)
1893 {
1894         while ((++level < nilfs_btree_height(btree) - 1) &&
1895                !buffer_dirty(path[level].bp_bh))
1896                 mark_buffer_dirty(path[level].bp_bh);
1897
1898         return 0;
1899 }
1900
1901 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1902                                         struct nilfs_btree_path *path,
1903                                         int level, struct inode *dat)
1904 {
1905         struct nilfs_btree_node *parent;
1906         int ncmax, ret;
1907
1908         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1909         path[level].bp_oldreq.bpr_ptr =
1910                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1911                                          ncmax);
1912         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1913         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1914                                        &path[level].bp_newreq.bpr_req);
1915         if (ret < 0)
1916                 return ret;
1917
1918         if (buffer_nilfs_node(path[level].bp_bh)) {
1919                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1920                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1921                 path[level].bp_ctxt.bh = path[level].bp_bh;
1922                 ret = nilfs_btnode_prepare_change_key(
1923                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1924                         &path[level].bp_ctxt);
1925                 if (ret < 0) {
1926                         nilfs_dat_abort_update(dat,
1927                                                &path[level].bp_oldreq.bpr_req,
1928                                                &path[level].bp_newreq.bpr_req);
1929                         return ret;
1930                 }
1931         }
1932
1933         return 0;
1934 }
1935
1936 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1937                                         struct nilfs_btree_path *path,
1938                                         int level, struct inode *dat)
1939 {
1940         struct nilfs_btree_node *parent;
1941         int ncmax;
1942
1943         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1944                                 &path[level].bp_newreq.bpr_req,
1945                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1946
1947         if (buffer_nilfs_node(path[level].bp_bh)) {
1948                 nilfs_btnode_commit_change_key(
1949                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1950                         &path[level].bp_ctxt);
1951                 path[level].bp_bh = path[level].bp_ctxt.bh;
1952         }
1953         set_buffer_nilfs_volatile(path[level].bp_bh);
1954
1955         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1956         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1957                                  path[level].bp_newreq.bpr_ptr, ncmax);
1958 }
1959
1960 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1961                                        struct nilfs_btree_path *path,
1962                                        int level, struct inode *dat)
1963 {
1964         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1965                                &path[level].bp_newreq.bpr_req);
1966         if (buffer_nilfs_node(path[level].bp_bh))
1967                 nilfs_btnode_abort_change_key(
1968                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1969                         &path[level].bp_ctxt);
1970 }
1971
1972 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1973                                            struct nilfs_btree_path *path,
1974                                            int minlevel, int *maxlevelp,
1975                                            struct inode *dat)
1976 {
1977         int level, ret;
1978
1979         level = minlevel;
1980         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1981                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1982                 if (ret < 0)
1983                         return ret;
1984         }
1985         while ((++level < nilfs_btree_height(btree) - 1) &&
1986                !buffer_dirty(path[level].bp_bh)) {
1987
1988                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1989                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1990                 if (ret < 0)
1991                         goto out;
1992         }
1993
1994         /* success */
1995         *maxlevelp = level - 1;
1996         return 0;
1997
1998         /* error */
1999  out:
2000         while (--level > minlevel)
2001                 nilfs_btree_abort_update_v(btree, path, level, dat);
2002         if (!buffer_nilfs_volatile(path[level].bp_bh))
2003                 nilfs_btree_abort_update_v(btree, path, level, dat);
2004         return ret;
2005 }
2006
2007 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2008                                            struct nilfs_btree_path *path,
2009                                            int minlevel, int maxlevel,
2010                                            struct buffer_head *bh,
2011                                            struct inode *dat)
2012 {
2013         int level;
2014
2015         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2016                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2017
2018         for (level = minlevel + 1; level <= maxlevel; level++)
2019                 nilfs_btree_commit_update_v(btree, path, level, dat);
2020 }
2021
2022 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2023                                    struct nilfs_btree_path *path,
2024                                    int level, struct buffer_head *bh)
2025 {
2026         int maxlevel = 0, ret;
2027         struct nilfs_btree_node *parent;
2028         struct inode *dat = nilfs_bmap_get_dat(btree);
2029         __u64 ptr;
2030         int ncmax;
2031
2032         get_bh(bh);
2033         path[level].bp_bh = bh;
2034         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2035                                               dat);
2036         if (ret < 0)
2037                 goto out;
2038
2039         if (buffer_nilfs_volatile(path[level].bp_bh)) {
2040                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2041                 ptr = nilfs_btree_node_get_ptr(parent,
2042                                                path[level + 1].bp_index,
2043                                                ncmax);
2044                 ret = nilfs_dat_mark_dirty(dat, ptr);
2045                 if (ret < 0)
2046                         goto out;
2047         }
2048
2049         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2050
2051  out:
2052         brelse(path[level].bp_bh);
2053         path[level].bp_bh = NULL;
2054         return ret;
2055 }
2056
2057 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2058                                  struct buffer_head *bh)
2059 {
2060         struct nilfs_btree_path *path;
2061         struct nilfs_btree_node *node;
2062         __u64 key;
2063         int level, ret;
2064
2065         WARN_ON(!buffer_dirty(bh));
2066
2067         path = nilfs_btree_alloc_path();
2068         if (path == NULL)
2069                 return -ENOMEM;
2070
2071         if (buffer_nilfs_node(bh)) {
2072                 node = (struct nilfs_btree_node *)bh->b_data;
2073                 key = nilfs_btree_node_get_key(node, 0);
2074                 level = nilfs_btree_node_get_level(node);
2075         } else {
2076                 key = nilfs_bmap_data_get_key(btree, bh);
2077                 level = NILFS_BTREE_LEVEL_DATA;
2078         }
2079
2080         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2081         if (ret < 0) {
2082                 if (unlikely(ret == -ENOENT))
2083                         nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
2084                                   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2085                                   btree->b_inode->i_ino,
2086                                   (unsigned long long)key, level);
2087                 goto out;
2088         }
2089
2090         ret = NILFS_BMAP_USE_VBN(btree) ?
2091                 nilfs_btree_propagate_v(btree, path, level, bh) :
2092                 nilfs_btree_propagate_p(btree, path, level, bh);
2093
2094  out:
2095         nilfs_btree_free_path(path);
2096
2097         return ret;
2098 }
2099
2100 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2101                                     struct buffer_head *bh)
2102 {
2103         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2104 }
2105
2106 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2107                                          struct list_head *lists,
2108                                          struct buffer_head *bh)
2109 {
2110         struct list_head *head;
2111         struct buffer_head *cbh;
2112         struct nilfs_btree_node *node, *cnode;
2113         __u64 key, ckey;
2114         int level;
2115
2116         get_bh(bh);
2117         node = (struct nilfs_btree_node *)bh->b_data;
2118         key = nilfs_btree_node_get_key(node, 0);
2119         level = nilfs_btree_node_get_level(node);
2120         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2121             level >= NILFS_BTREE_LEVEL_MAX) {
2122                 dump_stack();
2123                 nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
2124                           "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2125                           level, (unsigned long long)key,
2126                           btree->b_inode->i_ino,
2127                           (unsigned long long)bh->b_blocknr);
2128                 return;
2129         }
2130
2131         list_for_each(head, &lists[level]) {
2132                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2133                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2134                 ckey = nilfs_btree_node_get_key(cnode, 0);
2135                 if (key < ckey)
2136                         break;
2137         }
2138         list_add_tail(&bh->b_assoc_buffers, head);
2139 }
2140
2141 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2142                                              struct list_head *listp)
2143 {
2144         struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2145         struct address_space *btcache = btnc_inode->i_mapping;
2146         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2147         struct pagevec pvec;
2148         struct buffer_head *bh, *head;
2149         pgoff_t index = 0;
2150         int level, i;
2151
2152         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2153              level < NILFS_BTREE_LEVEL_MAX;
2154              level++)
2155                 INIT_LIST_HEAD(&lists[level]);
2156
2157         pagevec_init(&pvec);
2158
2159         while (pagevec_lookup_tag(&pvec, btcache, &index,
2160                                         PAGECACHE_TAG_DIRTY)) {
2161                 for (i = 0; i < pagevec_count(&pvec); i++) {
2162                         bh = head = page_buffers(pvec.pages[i]);
2163                         do {
2164                                 if (buffer_dirty(bh))
2165                                         nilfs_btree_add_dirty_buffer(btree,
2166                                                                      lists, bh);
2167                         } while ((bh = bh->b_this_page) != head);
2168                 }
2169                 pagevec_release(&pvec);
2170                 cond_resched();
2171         }
2172
2173         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2174              level < NILFS_BTREE_LEVEL_MAX;
2175              level++)
2176                 list_splice_tail(&lists[level], listp);
2177 }
2178
2179 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2180                                 struct nilfs_btree_path *path,
2181                                 int level,
2182                                 struct buffer_head **bh,
2183                                 sector_t blocknr,
2184                                 union nilfs_binfo *binfo)
2185 {
2186         struct nilfs_btree_node *parent;
2187         __u64 key;
2188         __u64 ptr;
2189         int ncmax, ret;
2190
2191         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2192         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2193                                        ncmax);
2194         if (buffer_nilfs_node(*bh)) {
2195                 path[level].bp_ctxt.oldkey = ptr;
2196                 path[level].bp_ctxt.newkey = blocknr;
2197                 path[level].bp_ctxt.bh = *bh;
2198                 ret = nilfs_btnode_prepare_change_key(
2199                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2200                         &path[level].bp_ctxt);
2201                 if (ret < 0)
2202                         return ret;
2203                 nilfs_btnode_commit_change_key(
2204                         NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2205                         &path[level].bp_ctxt);
2206                 *bh = path[level].bp_ctxt.bh;
2207         }
2208
2209         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2210                                  ncmax);
2211
2212         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2213         /* on-disk format */
2214         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2215         binfo->bi_dat.bi_level = level;
2216
2217         return 0;
2218 }
2219
2220 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2221                                 struct nilfs_btree_path *path,
2222                                 int level,
2223                                 struct buffer_head **bh,
2224                                 sector_t blocknr,
2225                                 union nilfs_binfo *binfo)
2226 {
2227         struct nilfs_btree_node *parent;
2228         struct inode *dat = nilfs_bmap_get_dat(btree);
2229         __u64 key;
2230         __u64 ptr;
2231         union nilfs_bmap_ptr_req req;
2232         int ncmax, ret;
2233
2234         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2235         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2236                                        ncmax);
2237         req.bpr_ptr = ptr;
2238         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2239         if (ret < 0)
2240                 return ret;
2241         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2242
2243         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2244         /* on-disk format */
2245         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2246         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2247
2248         return 0;
2249 }
2250
2251 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2252                               struct buffer_head **bh,
2253                               sector_t blocknr,
2254                               union nilfs_binfo *binfo)
2255 {
2256         struct nilfs_btree_path *path;
2257         struct nilfs_btree_node *node;
2258         __u64 key;
2259         int level, ret;
2260
2261         path = nilfs_btree_alloc_path();
2262         if (path == NULL)
2263                 return -ENOMEM;
2264
2265         if (buffer_nilfs_node(*bh)) {
2266                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2267                 key = nilfs_btree_node_get_key(node, 0);
2268                 level = nilfs_btree_node_get_level(node);
2269         } else {
2270                 key = nilfs_bmap_data_get_key(btree, *bh);
2271                 level = NILFS_BTREE_LEVEL_DATA;
2272         }
2273
2274         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2275         if (ret < 0) {
2276                 WARN_ON(ret == -ENOENT);
2277                 goto out;
2278         }
2279
2280         ret = NILFS_BMAP_USE_VBN(btree) ?
2281                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2282                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2283
2284  out:
2285         nilfs_btree_free_path(path);
2286
2287         return ret;
2288 }
2289
2290 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2291                                  struct buffer_head **bh,
2292                                  sector_t blocknr,
2293                                  union nilfs_binfo *binfo)
2294 {
2295         struct nilfs_btree_node *node;
2296         __u64 key;
2297         int ret;
2298
2299         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2300                              blocknr);
2301         if (ret < 0)
2302                 return ret;
2303
2304         if (buffer_nilfs_node(*bh)) {
2305                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2306                 key = nilfs_btree_node_get_key(node, 0);
2307         } else
2308                 key = nilfs_bmap_data_get_key(btree, *bh);
2309
2310         /* on-disk format */
2311         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2312         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2313
2314         return 0;
2315 }
2316
2317 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2318 {
2319         struct buffer_head *bh;
2320         struct nilfs_btree_path *path;
2321         __u64 ptr;
2322         int ret;
2323
2324         path = nilfs_btree_alloc_path();
2325         if (path == NULL)
2326                 return -ENOMEM;
2327
2328         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2329         if (ret < 0) {
2330                 WARN_ON(ret == -ENOENT);
2331                 goto out;
2332         }
2333         ret = nilfs_btree_get_block(btree, ptr, &bh);
2334         if (ret < 0) {
2335                 WARN_ON(ret == -ENOENT);
2336                 goto out;
2337         }
2338
2339         if (!buffer_dirty(bh))
2340                 mark_buffer_dirty(bh);
2341         brelse(bh);
2342         if (!nilfs_bmap_dirty(btree))
2343                 nilfs_bmap_set_dirty(btree);
2344
2345  out:
2346         nilfs_btree_free_path(path);
2347         return ret;
2348 }
2349
2350 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2351         .bop_lookup             =       nilfs_btree_lookup,
2352         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2353         .bop_insert             =       nilfs_btree_insert,
2354         .bop_delete             =       nilfs_btree_delete,
2355         .bop_clear              =       NULL,
2356
2357         .bop_propagate          =       nilfs_btree_propagate,
2358
2359         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2360
2361         .bop_assign             =       nilfs_btree_assign,
2362         .bop_mark               =       nilfs_btree_mark,
2363
2364         .bop_seek_key           =       nilfs_btree_seek_key,
2365         .bop_last_key           =       nilfs_btree_last_key,
2366
2367         .bop_check_insert       =       NULL,
2368         .bop_check_delete       =       nilfs_btree_check_delete,
2369         .bop_gather_data        =       nilfs_btree_gather_data,
2370 };
2371
2372 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2373         .bop_lookup             =       NULL,
2374         .bop_lookup_contig      =       NULL,
2375         .bop_insert             =       NULL,
2376         .bop_delete             =       NULL,
2377         .bop_clear              =       NULL,
2378
2379         .bop_propagate          =       nilfs_btree_propagate_gc,
2380
2381         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2382
2383         .bop_assign             =       nilfs_btree_assign_gc,
2384         .bop_mark               =       NULL,
2385
2386         .bop_seek_key           =       NULL,
2387         .bop_last_key           =       NULL,
2388
2389         .bop_check_insert       =       NULL,
2390         .bop_check_delete       =       NULL,
2391         .bop_gather_data        =       NULL,
2392 };
2393
2394 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2395 {
2396         bmap->b_ops = &nilfs_btree_ops;
2397         bmap->b_nchildren_per_block =
2398                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2399 }
2400
2401 int nilfs_btree_init(struct nilfs_bmap *bmap)
2402 {
2403         int ret = 0;
2404
2405         __nilfs_btree_init(bmap);
2406
2407         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2408                 ret = -EIO;
2409         else
2410                 ret = nilfs_attach_btree_node_cache(
2411                         &NILFS_BMAP_I(bmap)->vfs_inode);
2412
2413         return ret;
2414 }
2415
2416 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2417 {
2418         bmap->b_ops = &nilfs_btree_ops_gc;
2419         bmap->b_nchildren_per_block =
2420                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2421 }