xref: /linux/fs/nilfs2/btree.c (revision 6000fc4d6f3e55ad52cce8d76317187fe01af2aa)
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22 
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33 
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44 	struct buffer_head *bp_bh;
45 	struct buffer_head *bp_sib_bh;
46 	int bp_index;
47 	union nilfs_bmap_ptr_req bp_oldreq;
48 	union nilfs_bmap_ptr_req bp_newreq;
49 	struct nilfs_btnode_chkey_ctxt bp_ctxt;
50 	void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51 		      int, __u64 *, __u64 *);
52 };
53 
54 /*
55  * B-tree path operations
56  */
57 
58 static struct kmem_cache *nilfs_btree_path_cache;
59 
60 int __init nilfs_btree_path_cache_init(void)
61 {
62 	nilfs_btree_path_cache =
63 		kmem_cache_create("nilfs2_btree_path_cache",
64 				  sizeof(struct nilfs_btree_path) *
65 				  NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66 	return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68 
69 void nilfs_btree_path_cache_destroy(void)
70 {
71 	kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73 
74 static inline struct nilfs_btree_path *
75 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
76 {
77 	return (struct nilfs_btree_path *)
78 		kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
79 }
80 
81 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
82 					 struct nilfs_btree_path *path)
83 {
84 	kmem_cache_free(nilfs_btree_path_cache, path);
85 }
86 
87 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
88 				  struct nilfs_btree_path *path)
89 {
90 	int level;
91 
92 	for (level = NILFS_BTREE_LEVEL_DATA;
93 	     level < NILFS_BTREE_LEVEL_MAX;
94 	     level++) {
95 		path[level].bp_bh = NULL;
96 		path[level].bp_sib_bh = NULL;
97 		path[level].bp_index = 0;
98 		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99 		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
100 		path[level].bp_op = NULL;
101 	}
102 }
103 
104 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
105 				   struct nilfs_btree_path *path)
106 {
107 	int level;
108 
109 	for (level = NILFS_BTREE_LEVEL_DATA;
110 	     level < NILFS_BTREE_LEVEL_MAX;
111 	     level++) {
112 		if (path[level].bp_bh != NULL) {
113 			brelse(path[level].bp_bh);
114 			path[level].bp_bh = NULL;
115 		}
116 		/* sib_bh is released or deleted by prepare or commit
117 		 * operations. */
118 		path[level].bp_sib_bh = NULL;
119 		path[level].bp_index = 0;
120 		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121 		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
122 		path[level].bp_op = NULL;
123 	}
124 }
125 
126 /*
127  * B-tree node operations
128  */
129 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
130 				 struct buffer_head **bhp)
131 {
132 	struct address_space *btnc =
133 		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
134 	return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
135 }
136 
137 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
138 				     __u64 ptr, struct buffer_head **bhp)
139 {
140 	struct address_space *btnc =
141 		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
142 	int ret;
143 
144 	ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
145 	if (!ret)
146 		set_buffer_nilfs_volatile(*bhp);
147 	return ret;
148 }
149 
150 static inline int
151 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
152 			   const struct nilfs_btree_node *node)
153 {
154 	return node->bn_flags;
155 }
156 
157 static inline void
158 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
159 			   struct nilfs_btree_node *node,
160 			   int flags)
161 {
162 	node->bn_flags = flags;
163 }
164 
165 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
166 					const struct nilfs_btree_node *node)
167 {
168 	return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
169 }
170 
171 static inline int
172 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
173 			   const struct nilfs_btree_node *node)
174 {
175 	return node->bn_level;
176 }
177 
178 static inline void
179 nilfs_btree_node_set_level(struct nilfs_btree *btree,
180 			   struct nilfs_btree_node *node,
181 			   int level)
182 {
183 	node->bn_level = level;
184 }
185 
186 static inline int
187 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
188 			       const struct nilfs_btree_node *node)
189 {
190 	return le16_to_cpu(node->bn_nchildren);
191 }
192 
193 static inline void
194 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
195 			       struct nilfs_btree_node *node,
196 			       int nchildren)
197 {
198 	node->bn_nchildren = cpu_to_le16(nchildren);
199 }
200 
201 static inline int
202 nilfs_btree_node_size(const struct nilfs_btree *btree)
203 {
204 	return 1 << btree->bt_bmap.b_inode->i_blkbits;
205 }
206 
207 static inline int
208 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
209 			       const struct nilfs_btree_node *node)
210 {
211 	return nilfs_btree_node_root(btree, node) ?
212 		NILFS_BTREE_ROOT_NCHILDREN_MIN :
213 		NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
214 }
215 
216 static inline int
217 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
218 			       const struct nilfs_btree_node *node)
219 {
220 	return nilfs_btree_node_root(btree, node) ?
221 		NILFS_BTREE_ROOT_NCHILDREN_MAX :
222 		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
223 }
224 
225 static inline __le64 *
226 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
227 		       const struct nilfs_btree_node *node)
228 {
229 	return (__le64 *)((char *)(node + 1) +
230 			  (nilfs_btree_node_root(btree, node) ?
231 			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
232 }
233 
234 static inline __le64 *
235 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
236 		       const struct nilfs_btree_node *node)
237 {
238 	return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
239 			  nilfs_btree_node_nchildren_max(btree, node));
240 }
241 
242 static inline __u64
243 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
244 			 const struct nilfs_btree_node *node, int index)
245 {
246 	return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
247 					index));
248 }
249 
250 static inline void
251 nilfs_btree_node_set_key(struct nilfs_btree *btree,
252 			 struct nilfs_btree_node *node, int index, __u64 key)
253 {
254 	*(nilfs_btree_node_dkeys(btree, node) + index) =
255 		nilfs_bmap_key_to_dkey(key);
256 }
257 
258 static inline __u64
259 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
260 			 const struct nilfs_btree_node *node,
261 			 int index)
262 {
263 	return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
264 					index));
265 }
266 
267 static inline void
268 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
269 			 struct nilfs_btree_node *node,
270 			 int index,
271 			 __u64 ptr)
272 {
273 	*(nilfs_btree_node_dptrs(btree, node) + index) =
274 		nilfs_bmap_ptr_to_dptr(ptr);
275 }
276 
277 static void nilfs_btree_node_init(struct nilfs_btree *btree,
278 				  struct nilfs_btree_node *node,
279 				  int flags, int level, int nchildren,
280 				  const __u64 *keys, const __u64 *ptrs)
281 {
282 	__le64 *dkeys;
283 	__le64 *dptrs;
284 	int i;
285 
286 	nilfs_btree_node_set_flags(btree, node, flags);
287 	nilfs_btree_node_set_level(btree, node, level);
288 	nilfs_btree_node_set_nchildren(btree, node, nchildren);
289 
290 	dkeys = nilfs_btree_node_dkeys(btree, node);
291 	dptrs = nilfs_btree_node_dptrs(btree, node);
292 	for (i = 0; i < nchildren; i++) {
293 		dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
294 		dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
295 	}
296 }
297 
298 /* Assume the buffer heads corresponding to left and right are locked. */
299 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
300 				       struct nilfs_btree_node *left,
301 				       struct nilfs_btree_node *right,
302 				       int n)
303 {
304 	__le64 *ldkeys, *rdkeys;
305 	__le64 *ldptrs, *rdptrs;
306 	int lnchildren, rnchildren;
307 
308 	ldkeys = nilfs_btree_node_dkeys(btree, left);
309 	ldptrs = nilfs_btree_node_dptrs(btree, left);
310 	lnchildren = nilfs_btree_node_get_nchildren(btree, left);
311 
312 	rdkeys = nilfs_btree_node_dkeys(btree, right);
313 	rdptrs = nilfs_btree_node_dptrs(btree, right);
314 	rnchildren = nilfs_btree_node_get_nchildren(btree, right);
315 
316 	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
317 	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
318 	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
319 	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
320 
321 	lnchildren += n;
322 	rnchildren -= n;
323 	nilfs_btree_node_set_nchildren(btree, left, lnchildren);
324 	nilfs_btree_node_set_nchildren(btree, right, rnchildren);
325 }
326 
327 /* Assume that the buffer heads corresponding to left and right are locked. */
328 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
329 					struct nilfs_btree_node *left,
330 					struct nilfs_btree_node *right,
331 					int n)
332 {
333 	__le64 *ldkeys, *rdkeys;
334 	__le64 *ldptrs, *rdptrs;
335 	int lnchildren, rnchildren;
336 
337 	ldkeys = nilfs_btree_node_dkeys(btree, left);
338 	ldptrs = nilfs_btree_node_dptrs(btree, left);
339 	lnchildren = nilfs_btree_node_get_nchildren(btree, left);
340 
341 	rdkeys = nilfs_btree_node_dkeys(btree, right);
342 	rdptrs = nilfs_btree_node_dptrs(btree, right);
343 	rnchildren = nilfs_btree_node_get_nchildren(btree, right);
344 
345 	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
346 	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
347 	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
348 	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
349 
350 	lnchildren -= n;
351 	rnchildren += n;
352 	nilfs_btree_node_set_nchildren(btree, left, lnchildren);
353 	nilfs_btree_node_set_nchildren(btree, right, rnchildren);
354 }
355 
356 /* Assume that the buffer head corresponding to node is locked. */
357 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
358 				    struct nilfs_btree_node *node,
359 				    __u64 key, __u64 ptr, int index)
360 {
361 	__le64 *dkeys;
362 	__le64 *dptrs;
363 	int nchildren;
364 
365 	dkeys = nilfs_btree_node_dkeys(btree, node);
366 	dptrs = nilfs_btree_node_dptrs(btree, node);
367 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
368 	if (index < nchildren) {
369 		memmove(dkeys + index + 1, dkeys + index,
370 			(nchildren - index) * sizeof(*dkeys));
371 		memmove(dptrs + index + 1, dptrs + index,
372 			(nchildren - index) * sizeof(*dptrs));
373 	}
374 	dkeys[index] = nilfs_bmap_key_to_dkey(key);
375 	dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
376 	nchildren++;
377 	nilfs_btree_node_set_nchildren(btree, node, nchildren);
378 }
379 
380 /* Assume that the buffer head corresponding to node is locked. */
381 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
382 				    struct nilfs_btree_node *node,
383 				    __u64 *keyp, __u64 *ptrp, int index)
384 {
385 	__u64 key;
386 	__u64 ptr;
387 	__le64 *dkeys;
388 	__le64 *dptrs;
389 	int nchildren;
390 
391 	dkeys = nilfs_btree_node_dkeys(btree, node);
392 	dptrs = nilfs_btree_node_dptrs(btree, node);
393 	key = nilfs_bmap_dkey_to_key(dkeys[index]);
394 	ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
395 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
396 	if (keyp != NULL)
397 		*keyp = key;
398 	if (ptrp != NULL)
399 		*ptrp = ptr;
400 
401 	if (index < nchildren - 1) {
402 		memmove(dkeys + index, dkeys + index + 1,
403 			(nchildren - index - 1) * sizeof(*dkeys));
404 		memmove(dptrs + index, dptrs + index + 1,
405 			(nchildren - index - 1) * sizeof(*dptrs));
406 	}
407 	nchildren--;
408 	nilfs_btree_node_set_nchildren(btree, node, nchildren);
409 }
410 
411 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
412 				   const struct nilfs_btree_node *node,
413 				   __u64 key, int *indexp)
414 {
415 	__u64 nkey;
416 	int index, low, high, s;
417 
418 	/* binary search */
419 	low = 0;
420 	high = nilfs_btree_node_get_nchildren(btree, node) - 1;
421 	index = 0;
422 	s = 0;
423 	while (low <= high) {
424 		index = (low + high) / 2;
425 		nkey = nilfs_btree_node_get_key(btree, node, index);
426 		if (nkey == key) {
427 			s = 0;
428 			goto out;
429 		} else if (nkey < key) {
430 			low = index + 1;
431 			s = -1;
432 		} else {
433 			high = index - 1;
434 			s = 1;
435 		}
436 	}
437 
438 	/* adjust index */
439 	if (nilfs_btree_node_get_level(btree, node) >
440 	    NILFS_BTREE_LEVEL_NODE_MIN) {
441 		if ((s > 0) && (index > 0))
442 			index--;
443 	} else if (s < 0)
444 		index++;
445 
446  out:
447 	*indexp = index;
448 
449 	return s == 0;
450 }
451 
452 static inline struct nilfs_btree_node *
453 nilfs_btree_get_root(const struct nilfs_btree *btree)
454 {
455 	return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
456 }
457 
458 static inline struct nilfs_btree_node *
459 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
460 			     const struct nilfs_btree_path *path,
461 			     int level)
462 {
463 	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
464 }
465 
466 static inline struct nilfs_btree_node *
467 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
468 			 const struct nilfs_btree_path *path,
469 			 int level)
470 {
471 	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
472 }
473 
474 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
475 {
476 	return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
477 		+ 1;
478 }
479 
480 static inline struct nilfs_btree_node *
481 nilfs_btree_get_node(const struct nilfs_btree *btree,
482 		     const struct nilfs_btree_path *path,
483 		     int level)
484 {
485 	return (level == nilfs_btree_height(btree) - 1) ?
486 		nilfs_btree_get_root(btree) :
487 		nilfs_btree_get_nonroot_node(btree, path, level);
488 }
489 
490 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
491 				 struct nilfs_btree_path *path,
492 				 __u64 key, __u64 *ptrp, int minlevel)
493 {
494 	struct nilfs_btree_node *node;
495 	__u64 ptr;
496 	int level, index, found, ret;
497 
498 	node = nilfs_btree_get_root(btree);
499 	level = nilfs_btree_node_get_level(btree, node);
500 	if ((level < minlevel) ||
501 	    (nilfs_btree_node_get_nchildren(btree, node) <= 0))
502 		return -ENOENT;
503 
504 	found = nilfs_btree_node_lookup(btree, node, key, &index);
505 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
506 	path[level].bp_bh = NULL;
507 	path[level].bp_index = index;
508 
509 	for (level--; level >= minlevel; level--) {
510 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
511 		if (ret < 0)
512 			return ret;
513 		node = nilfs_btree_get_nonroot_node(btree, path, level);
514 		BUG_ON(level != nilfs_btree_node_get_level(btree, node));
515 		if (!found)
516 			found = nilfs_btree_node_lookup(btree, node, key,
517 							&index);
518 		else
519 			index = 0;
520 		if (index < nilfs_btree_node_nchildren_max(btree, node))
521 			ptr = nilfs_btree_node_get_ptr(btree, node, index);
522 		else {
523 			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
524 			/* insert */
525 			ptr = NILFS_BMAP_INVALID_PTR;
526 		}
527 		path[level].bp_index = index;
528 	}
529 	if (!found)
530 		return -ENOENT;
531 
532 	if (ptrp != NULL)
533 		*ptrp = ptr;
534 
535 	return 0;
536 }
537 
538 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
539 				      struct nilfs_btree_path *path,
540 				      __u64 *keyp, __u64 *ptrp)
541 {
542 	struct nilfs_btree_node *node;
543 	__u64 ptr;
544 	int index, level, ret;
545 
546 	node = nilfs_btree_get_root(btree);
547 	index = nilfs_btree_node_get_nchildren(btree, node) - 1;
548 	if (index < 0)
549 		return -ENOENT;
550 	level = nilfs_btree_node_get_level(btree, node);
551 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
552 	path[level].bp_bh = NULL;
553 	path[level].bp_index = index;
554 
555 	for (level--; level > 0; level--) {
556 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
557 		if (ret < 0)
558 			return ret;
559 		node = nilfs_btree_get_nonroot_node(btree, path, level);
560 		BUG_ON(level != nilfs_btree_node_get_level(btree, node));
561 		index = nilfs_btree_node_get_nchildren(btree, node) - 1;
562 		ptr = nilfs_btree_node_get_ptr(btree, node, index);
563 		path[level].bp_index = index;
564 	}
565 
566 	if (keyp != NULL)
567 		*keyp = nilfs_btree_node_get_key(btree, node, index);
568 	if (ptrp != NULL)
569 		*ptrp = ptr;
570 
571 	return 0;
572 }
573 
574 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
575 			      __u64 key, int level, __u64 *ptrp)
576 {
577 	struct nilfs_btree *btree;
578 	struct nilfs_btree_path *path;
579 	__u64 ptr;
580 	int ret;
581 
582 	btree = (struct nilfs_btree *)bmap;
583 	path = nilfs_btree_alloc_path(btree);
584 	if (path == NULL)
585 		return -ENOMEM;
586 	nilfs_btree_init_path(btree, path);
587 
588 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
589 
590 	if (ptrp != NULL)
591 		*ptrp = ptr;
592 
593 	nilfs_btree_clear_path(btree, path);
594 	nilfs_btree_free_path(btree, path);
595 
596 	return ret;
597 }
598 
599 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
600 				     __u64 key, __u64 *ptrp, unsigned maxblocks)
601 {
602 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
603 	struct nilfs_btree_path *path;
604 	struct nilfs_btree_node *node;
605 	struct inode *dat = NULL;
606 	__u64 ptr, ptr2;
607 	sector_t blocknr;
608 	int level = NILFS_BTREE_LEVEL_NODE_MIN;
609 	int ret, cnt, index, maxlevel;
610 
611 	path = nilfs_btree_alloc_path(btree);
612 	if (path == NULL)
613 		return -ENOMEM;
614 	nilfs_btree_init_path(btree, path);
615 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
616 	if (ret < 0)
617 		goto out;
618 
619 	if (NILFS_BMAP_USE_VBN(bmap)) {
620 		dat = nilfs_bmap_get_dat(bmap);
621 		ret = nilfs_dat_translate(dat, ptr, &blocknr);
622 		if (ret < 0)
623 			goto out;
624 		ptr = blocknr;
625 	}
626 	cnt = 1;
627 	if (cnt == maxblocks)
628 		goto end;
629 
630 	maxlevel = nilfs_btree_height(btree) - 1;
631 	node = nilfs_btree_get_node(btree, path, level);
632 	index = path[level].bp_index + 1;
633 	for (;;) {
634 		while (index < nilfs_btree_node_get_nchildren(btree, node)) {
635 			if (nilfs_btree_node_get_key(btree, node, index) !=
636 			    key + cnt)
637 				goto end;
638 			ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
639 			if (dat) {
640 				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
641 				if (ret < 0)
642 					goto out;
643 				ptr2 = blocknr;
644 			}
645 			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
646 				goto end;
647 			index++;
648 			continue;
649 		}
650 		if (level == maxlevel)
651 			break;
652 
653 		/* look-up right sibling node */
654 		node = nilfs_btree_get_node(btree, path, level + 1);
655 		index = path[level + 1].bp_index + 1;
656 		if (index >= nilfs_btree_node_get_nchildren(btree, node) ||
657 		    nilfs_btree_node_get_key(btree, node, index) != key + cnt)
658 			break;
659 		ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
660 		path[level + 1].bp_index = index;
661 
662 		brelse(path[level].bp_bh);
663 		path[level].bp_bh = NULL;
664 		ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
665 		if (ret < 0)
666 			goto out;
667 		node = nilfs_btree_get_nonroot_node(btree, path, level);
668 		index = 0;
669 		path[level].bp_index = index;
670 	}
671  end:
672 	*ptrp = ptr;
673 	ret = cnt;
674  out:
675 	nilfs_btree_clear_path(btree, path);
676 	nilfs_btree_free_path(btree, path);
677 	return ret;
678 }
679 
680 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
681 				    struct nilfs_btree_path *path,
682 				    int level, __u64 key)
683 {
684 	if (level < nilfs_btree_height(btree) - 1) {
685 		do {
686 			lock_buffer(path[level].bp_bh);
687 			nilfs_btree_node_set_key(
688 				btree,
689 				nilfs_btree_get_nonroot_node(
690 					btree, path, level),
691 				path[level].bp_index, key);
692 			if (!buffer_dirty(path[level].bp_bh))
693 				nilfs_btnode_mark_dirty(path[level].bp_bh);
694 			unlock_buffer(path[level].bp_bh);
695 		} while ((path[level].bp_index == 0) &&
696 			 (++level < nilfs_btree_height(btree) - 1));
697 	}
698 
699 	/* root */
700 	if (level == nilfs_btree_height(btree) - 1) {
701 		nilfs_btree_node_set_key(btree,
702 					 nilfs_btree_get_root(btree),
703 					 path[level].bp_index, key);
704 	}
705 }
706 
707 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
708 				  struct nilfs_btree_path *path,
709 				  int level, __u64 *keyp, __u64 *ptrp)
710 {
711 	struct nilfs_btree_node *node;
712 
713 	if (level < nilfs_btree_height(btree) - 1) {
714 		lock_buffer(path[level].bp_bh);
715 		node = nilfs_btree_get_nonroot_node(btree, path, level);
716 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
717 					path[level].bp_index);
718 		if (!buffer_dirty(path[level].bp_bh))
719 			nilfs_btnode_mark_dirty(path[level].bp_bh);
720 		unlock_buffer(path[level].bp_bh);
721 
722 		if (path[level].bp_index == 0)
723 			nilfs_btree_promote_key(btree, path, level + 1,
724 						nilfs_btree_node_get_key(
725 							btree, node, 0));
726 	} else {
727 		node = nilfs_btree_get_root(btree);
728 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
729 					path[level].bp_index);
730 	}
731 }
732 
733 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
734 				   struct nilfs_btree_path *path,
735 				   int level, __u64 *keyp, __u64 *ptrp)
736 {
737 	struct nilfs_btree_node *node, *left;
738 	int nchildren, lnchildren, n, move;
739 
740 	lock_buffer(path[level].bp_bh);
741 	lock_buffer(path[level].bp_sib_bh);
742 
743 	node = nilfs_btree_get_nonroot_node(btree, path, level);
744 	left = nilfs_btree_get_sib_node(btree, path, level);
745 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
746 	lnchildren = nilfs_btree_node_get_nchildren(btree, left);
747 	move = 0;
748 
749 	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
750 	if (n > path[level].bp_index) {
751 		/* move insert point */
752 		n--;
753 		move = 1;
754 	}
755 
756 	nilfs_btree_node_move_left(btree, left, node, n);
757 
758 	if (!buffer_dirty(path[level].bp_bh))
759 		nilfs_btnode_mark_dirty(path[level].bp_bh);
760 	if (!buffer_dirty(path[level].bp_sib_bh))
761 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
762 
763 	unlock_buffer(path[level].bp_bh);
764 	unlock_buffer(path[level].bp_sib_bh);
765 
766 	nilfs_btree_promote_key(btree, path, level + 1,
767 				nilfs_btree_node_get_key(btree, node, 0));
768 
769 	if (move) {
770 		brelse(path[level].bp_bh);
771 		path[level].bp_bh = path[level].bp_sib_bh;
772 		path[level].bp_sib_bh = NULL;
773 		path[level].bp_index += lnchildren;
774 		path[level + 1].bp_index--;
775 	} else {
776 		brelse(path[level].bp_sib_bh);
777 		path[level].bp_sib_bh = NULL;
778 		path[level].bp_index -= n;
779 	}
780 
781 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
782 }
783 
784 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
785 				    struct nilfs_btree_path *path,
786 				    int level, __u64 *keyp, __u64 *ptrp)
787 {
788 	struct nilfs_btree_node *node, *right;
789 	int nchildren, rnchildren, n, move;
790 
791 	lock_buffer(path[level].bp_bh);
792 	lock_buffer(path[level].bp_sib_bh);
793 
794 	node = nilfs_btree_get_nonroot_node(btree, path, level);
795 	right = nilfs_btree_get_sib_node(btree, path, level);
796 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
797 	rnchildren = nilfs_btree_node_get_nchildren(btree, right);
798 	move = 0;
799 
800 	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
801 	if (n > nchildren - path[level].bp_index) {
802 		/* move insert point */
803 		n--;
804 		move = 1;
805 	}
806 
807 	nilfs_btree_node_move_right(btree, node, right, n);
808 
809 	if (!buffer_dirty(path[level].bp_bh))
810 		nilfs_btnode_mark_dirty(path[level].bp_bh);
811 	if (!buffer_dirty(path[level].bp_sib_bh))
812 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
813 
814 	unlock_buffer(path[level].bp_bh);
815 	unlock_buffer(path[level].bp_sib_bh);
816 
817 	path[level + 1].bp_index++;
818 	nilfs_btree_promote_key(btree, path, level + 1,
819 				nilfs_btree_node_get_key(btree, right, 0));
820 	path[level + 1].bp_index--;
821 
822 	if (move) {
823 		brelse(path[level].bp_bh);
824 		path[level].bp_bh = path[level].bp_sib_bh;
825 		path[level].bp_sib_bh = NULL;
826 		path[level].bp_index -=
827 			nilfs_btree_node_get_nchildren(btree, node);
828 		path[level + 1].bp_index++;
829 	} else {
830 		brelse(path[level].bp_sib_bh);
831 		path[level].bp_sib_bh = NULL;
832 	}
833 
834 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
835 }
836 
837 static void nilfs_btree_split(struct nilfs_btree *btree,
838 			      struct nilfs_btree_path *path,
839 			      int level, __u64 *keyp, __u64 *ptrp)
840 {
841 	struct nilfs_btree_node *node, *right;
842 	__u64 newkey;
843 	__u64 newptr;
844 	int nchildren, n, move;
845 
846 	lock_buffer(path[level].bp_bh);
847 	lock_buffer(path[level].bp_sib_bh);
848 
849 	node = nilfs_btree_get_nonroot_node(btree, path, level);
850 	right = nilfs_btree_get_sib_node(btree, path, level);
851 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
852 	move = 0;
853 
854 	n = (nchildren + 1) / 2;
855 	if (n > nchildren - path[level].bp_index) {
856 		n--;
857 		move = 1;
858 	}
859 
860 	nilfs_btree_node_move_right(btree, node, right, n);
861 
862 	if (!buffer_dirty(path[level].bp_bh))
863 		nilfs_btnode_mark_dirty(path[level].bp_bh);
864 	if (!buffer_dirty(path[level].bp_sib_bh))
865 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
866 
867 	unlock_buffer(path[level].bp_bh);
868 	unlock_buffer(path[level].bp_sib_bh);
869 
870 	newkey = nilfs_btree_node_get_key(btree, right, 0);
871 	newptr = path[level].bp_newreq.bpr_ptr;
872 
873 	if (move) {
874 		path[level].bp_index -=
875 			nilfs_btree_node_get_nchildren(btree, node);
876 		nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
877 					path[level].bp_index);
878 
879 		*keyp = nilfs_btree_node_get_key(btree, right, 0);
880 		*ptrp = path[level].bp_newreq.bpr_ptr;
881 
882 		brelse(path[level].bp_bh);
883 		path[level].bp_bh = path[level].bp_sib_bh;
884 		path[level].bp_sib_bh = NULL;
885 	} else {
886 		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
887 
888 		*keyp = nilfs_btree_node_get_key(btree, right, 0);
889 		*ptrp = path[level].bp_newreq.bpr_ptr;
890 
891 		brelse(path[level].bp_sib_bh);
892 		path[level].bp_sib_bh = NULL;
893 	}
894 
895 	path[level + 1].bp_index++;
896 }
897 
898 static void nilfs_btree_grow(struct nilfs_btree *btree,
899 			     struct nilfs_btree_path *path,
900 			     int level, __u64 *keyp, __u64 *ptrp)
901 {
902 	struct nilfs_btree_node *root, *child;
903 	int n;
904 
905 	lock_buffer(path[level].bp_sib_bh);
906 
907 	root = nilfs_btree_get_root(btree);
908 	child = nilfs_btree_get_sib_node(btree, path, level);
909 
910 	n = nilfs_btree_node_get_nchildren(btree, root);
911 
912 	nilfs_btree_node_move_right(btree, root, child, n);
913 	nilfs_btree_node_set_level(btree, root, level + 1);
914 
915 	if (!buffer_dirty(path[level].bp_sib_bh))
916 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
917 
918 	unlock_buffer(path[level].bp_sib_bh);
919 
920 	path[level].bp_bh = path[level].bp_sib_bh;
921 	path[level].bp_sib_bh = NULL;
922 
923 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
924 
925 	*keyp = nilfs_btree_node_get_key(btree, child, 0);
926 	*ptrp = path[level].bp_newreq.bpr_ptr;
927 }
928 
929 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
930 				   const struct nilfs_btree_path *path)
931 {
932 	struct nilfs_btree_node *node;
933 	int level;
934 
935 	if (path == NULL)
936 		return NILFS_BMAP_INVALID_PTR;
937 
938 	/* left sibling */
939 	level = NILFS_BTREE_LEVEL_NODE_MIN;
940 	if (path[level].bp_index > 0) {
941 		node = nilfs_btree_get_node(btree, path, level);
942 		return nilfs_btree_node_get_ptr(btree, node,
943 						path[level].bp_index - 1);
944 	}
945 
946 	/* parent */
947 	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
948 	if (level <= nilfs_btree_height(btree) - 1) {
949 		node = nilfs_btree_get_node(btree, path, level);
950 		return nilfs_btree_node_get_ptr(btree, node,
951 						path[level].bp_index);
952 	}
953 
954 	return NILFS_BMAP_INVALID_PTR;
955 }
956 
957 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
958 				       const struct nilfs_btree_path *path,
959 				       __u64 key)
960 {
961 	__u64 ptr;
962 
963 	ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
964 	if (ptr != NILFS_BMAP_INVALID_PTR)
965 		/* sequential access */
966 		return ptr;
967 	else {
968 		ptr = nilfs_btree_find_near(btree, path);
969 		if (ptr != NILFS_BMAP_INVALID_PTR)
970 			/* near */
971 			return ptr;
972 	}
973 	/* block group */
974 	return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
975 }
976 
977 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
978 				     __u64 ptr)
979 {
980 	btree->bt_bmap.b_last_allocated_key = key;
981 	btree->bt_bmap.b_last_allocated_ptr = ptr;
982 }
983 
984 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
985 				      struct nilfs_btree_path *path,
986 				      int *levelp, __u64 key, __u64 ptr,
987 				      struct nilfs_bmap_stats *stats)
988 {
989 	struct buffer_head *bh;
990 	struct nilfs_btree_node *node, *parent, *sib;
991 	__u64 sibptr;
992 	int pindex, level, ret;
993 
994 	stats->bs_nblocks = 0;
995 	level = NILFS_BTREE_LEVEL_DATA;
996 
997 	/* allocate a new ptr for data block */
998 	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
999 		path[level].bp_newreq.bpr_ptr =
1000 			nilfs_btree_find_target_v(btree, path, key);
1001 
1002 	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1003 					   &path[level].bp_newreq);
1004 	if (ret < 0)
1005 		goto err_out_data;
1006 
1007 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1008 	     level < nilfs_btree_height(btree) - 1;
1009 	     level++) {
1010 		node = nilfs_btree_get_nonroot_node(btree, path, level);
1011 		if (nilfs_btree_node_get_nchildren(btree, node) <
1012 		    nilfs_btree_node_nchildren_max(btree, node)) {
1013 			path[level].bp_op = nilfs_btree_do_insert;
1014 			stats->bs_nblocks++;
1015 			goto out;
1016 		}
1017 
1018 		parent = nilfs_btree_get_node(btree, path, level + 1);
1019 		pindex = path[level + 1].bp_index;
1020 
1021 		/* left sibling */
1022 		if (pindex > 0) {
1023 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1024 							  pindex - 1);
1025 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1026 			if (ret < 0)
1027 				goto err_out_child_node;
1028 			sib = (struct nilfs_btree_node *)bh->b_data;
1029 			if (nilfs_btree_node_get_nchildren(btree, sib) <
1030 			    nilfs_btree_node_nchildren_max(btree, sib)) {
1031 				path[level].bp_sib_bh = bh;
1032 				path[level].bp_op = nilfs_btree_carry_left;
1033 				stats->bs_nblocks++;
1034 				goto out;
1035 			} else
1036 				brelse(bh);
1037 		}
1038 
1039 		/* right sibling */
1040 		if (pindex <
1041 		    nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1042 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1043 							  pindex + 1);
1044 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1045 			if (ret < 0)
1046 				goto err_out_child_node;
1047 			sib = (struct nilfs_btree_node *)bh->b_data;
1048 			if (nilfs_btree_node_get_nchildren(btree, sib) <
1049 			    nilfs_btree_node_nchildren_max(btree, sib)) {
1050 				path[level].bp_sib_bh = bh;
1051 				path[level].bp_op = nilfs_btree_carry_right;
1052 				stats->bs_nblocks++;
1053 				goto out;
1054 			} else
1055 				brelse(bh);
1056 		}
1057 
1058 		/* split */
1059 		path[level].bp_newreq.bpr_ptr =
1060 			path[level - 1].bp_newreq.bpr_ptr + 1;
1061 		ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1062 						   &path[level].bp_newreq);
1063 		if (ret < 0)
1064 			goto err_out_child_node;
1065 		ret = nilfs_btree_get_new_block(btree,
1066 						path[level].bp_newreq.bpr_ptr,
1067 						&bh);
1068 		if (ret < 0)
1069 			goto err_out_curr_node;
1070 
1071 		stats->bs_nblocks++;
1072 
1073 		lock_buffer(bh);
1074 		nilfs_btree_node_init(btree,
1075 				      (struct nilfs_btree_node *)bh->b_data,
1076 				      0, level, 0, NULL, NULL);
1077 		unlock_buffer(bh);
1078 		path[level].bp_sib_bh = bh;
1079 		path[level].bp_op = nilfs_btree_split;
1080 	}
1081 
1082 	/* root */
1083 	node = nilfs_btree_get_root(btree);
1084 	if (nilfs_btree_node_get_nchildren(btree, node) <
1085 	    nilfs_btree_node_nchildren_max(btree, node)) {
1086 		path[level].bp_op = nilfs_btree_do_insert;
1087 		stats->bs_nblocks++;
1088 		goto out;
1089 	}
1090 
1091 	/* grow */
1092 	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1093 	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1094 					   &path[level].bp_newreq);
1095 	if (ret < 0)
1096 		goto err_out_child_node;
1097 	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1098 					&bh);
1099 	if (ret < 0)
1100 		goto err_out_curr_node;
1101 
1102 	lock_buffer(bh);
1103 	nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1104 			      0, level, 0, NULL, NULL);
1105 	unlock_buffer(bh);
1106 	path[level].bp_sib_bh = bh;
1107 	path[level].bp_op = nilfs_btree_grow;
1108 
1109 	level++;
1110 	path[level].bp_op = nilfs_btree_do_insert;
1111 
1112 	/* a newly-created node block and a data block are added */
1113 	stats->bs_nblocks += 2;
1114 
1115 	/* success */
1116  out:
1117 	*levelp = level;
1118 	return ret;
1119 
1120 	/* error */
1121  err_out_curr_node:
1122 	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1123  err_out_child_node:
1124 	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1125 		nilfs_btnode_delete(path[level].bp_sib_bh);
1126 		nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1127 					   &path[level].bp_newreq);
1128 
1129 	}
1130 
1131 	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
1132  err_out_data:
1133 	*levelp = level;
1134 	stats->bs_nblocks = 0;
1135 	return ret;
1136 }
1137 
1138 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1139 				      struct nilfs_btree_path *path,
1140 				      int maxlevel, __u64 key, __u64 ptr)
1141 {
1142 	int level;
1143 
1144 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1145 	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1146 	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
1147 		nilfs_btree_set_target_v(btree, key, ptr);
1148 
1149 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1150 		nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1151 					    &path[level - 1].bp_newreq);
1152 		path[level].bp_op(btree, path, level, &key, &ptr);
1153 	}
1154 
1155 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1156 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1157 }
1158 
1159 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1160 {
1161 	struct nilfs_btree *btree;
1162 	struct nilfs_btree_path *path;
1163 	struct nilfs_bmap_stats stats;
1164 	int level, ret;
1165 
1166 	btree = (struct nilfs_btree *)bmap;
1167 	path = nilfs_btree_alloc_path(btree);
1168 	if (path == NULL)
1169 		return -ENOMEM;
1170 	nilfs_btree_init_path(btree, path);
1171 
1172 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1173 				    NILFS_BTREE_LEVEL_NODE_MIN);
1174 	if (ret != -ENOENT) {
1175 		if (ret == 0)
1176 			ret = -EEXIST;
1177 		goto out;
1178 	}
1179 
1180 	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1181 	if (ret < 0)
1182 		goto out;
1183 	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1184 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1185 
1186  out:
1187 	nilfs_btree_clear_path(btree, path);
1188 	nilfs_btree_free_path(btree, path);
1189 	return ret;
1190 }
1191 
1192 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1193 				  struct nilfs_btree_path *path,
1194 				  int level, __u64 *keyp, __u64 *ptrp)
1195 {
1196 	struct nilfs_btree_node *node;
1197 
1198 	if (level < nilfs_btree_height(btree) - 1) {
1199 		lock_buffer(path[level].bp_bh);
1200 		node = nilfs_btree_get_nonroot_node(btree, path, level);
1201 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1202 					path[level].bp_index);
1203 		if (!buffer_dirty(path[level].bp_bh))
1204 			nilfs_btnode_mark_dirty(path[level].bp_bh);
1205 		unlock_buffer(path[level].bp_bh);
1206 		if (path[level].bp_index == 0)
1207 			nilfs_btree_promote_key(btree, path, level + 1,
1208 				nilfs_btree_node_get_key(btree, node, 0));
1209 	} else {
1210 		node = nilfs_btree_get_root(btree);
1211 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1212 					path[level].bp_index);
1213 	}
1214 }
1215 
1216 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1217 				    struct nilfs_btree_path *path,
1218 				    int level, __u64 *keyp, __u64 *ptrp)
1219 {
1220 	struct nilfs_btree_node *node, *left;
1221 	int nchildren, lnchildren, n;
1222 
1223 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1224 
1225 	lock_buffer(path[level].bp_bh);
1226 	lock_buffer(path[level].bp_sib_bh);
1227 
1228 	node = nilfs_btree_get_nonroot_node(btree, path, level);
1229 	left = nilfs_btree_get_sib_node(btree, path, level);
1230 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
1231 	lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1232 
1233 	n = (nchildren + lnchildren) / 2 - nchildren;
1234 
1235 	nilfs_btree_node_move_right(btree, left, node, n);
1236 
1237 	if (!buffer_dirty(path[level].bp_bh))
1238 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1239 	if (!buffer_dirty(path[level].bp_sib_bh))
1240 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1241 
1242 	unlock_buffer(path[level].bp_bh);
1243 	unlock_buffer(path[level].bp_sib_bh);
1244 
1245 	nilfs_btree_promote_key(btree, path, level + 1,
1246 				nilfs_btree_node_get_key(btree, node, 0));
1247 
1248 	brelse(path[level].bp_sib_bh);
1249 	path[level].bp_sib_bh = NULL;
1250 	path[level].bp_index += n;
1251 }
1252 
1253 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1254 				     struct nilfs_btree_path *path,
1255 				     int level, __u64 *keyp, __u64 *ptrp)
1256 {
1257 	struct nilfs_btree_node *node, *right;
1258 	int nchildren, rnchildren, n;
1259 
1260 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1261 
1262 	lock_buffer(path[level].bp_bh);
1263 	lock_buffer(path[level].bp_sib_bh);
1264 
1265 	node = nilfs_btree_get_nonroot_node(btree, path, level);
1266 	right = nilfs_btree_get_sib_node(btree, path, level);
1267 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
1268 	rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1269 
1270 	n = (nchildren + rnchildren) / 2 - nchildren;
1271 
1272 	nilfs_btree_node_move_left(btree, node, right, n);
1273 
1274 	if (!buffer_dirty(path[level].bp_bh))
1275 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1276 	if (!buffer_dirty(path[level].bp_sib_bh))
1277 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1278 
1279 	unlock_buffer(path[level].bp_bh);
1280 	unlock_buffer(path[level].bp_sib_bh);
1281 
1282 	path[level + 1].bp_index++;
1283 	nilfs_btree_promote_key(btree, path, level + 1,
1284 				nilfs_btree_node_get_key(btree, right, 0));
1285 	path[level + 1].bp_index--;
1286 
1287 	brelse(path[level].bp_sib_bh);
1288 	path[level].bp_sib_bh = NULL;
1289 }
1290 
1291 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1292 				    struct nilfs_btree_path *path,
1293 				    int level, __u64 *keyp, __u64 *ptrp)
1294 {
1295 	struct nilfs_btree_node *node, *left;
1296 	int n;
1297 
1298 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1299 
1300 	lock_buffer(path[level].bp_bh);
1301 	lock_buffer(path[level].bp_sib_bh);
1302 
1303 	node = nilfs_btree_get_nonroot_node(btree, path, level);
1304 	left = nilfs_btree_get_sib_node(btree, path, level);
1305 
1306 	n = nilfs_btree_node_get_nchildren(btree, node);
1307 
1308 	nilfs_btree_node_move_left(btree, left, node, n);
1309 
1310 	if (!buffer_dirty(path[level].bp_sib_bh))
1311 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1312 
1313 	unlock_buffer(path[level].bp_bh);
1314 	unlock_buffer(path[level].bp_sib_bh);
1315 
1316 	nilfs_btnode_delete(path[level].bp_bh);
1317 	path[level].bp_bh = path[level].bp_sib_bh;
1318 	path[level].bp_sib_bh = NULL;
1319 	path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1320 }
1321 
1322 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1323 				     struct nilfs_btree_path *path,
1324 				     int level, __u64 *keyp, __u64 *ptrp)
1325 {
1326 	struct nilfs_btree_node *node, *right;
1327 	int n;
1328 
1329 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1330 
1331 	lock_buffer(path[level].bp_bh);
1332 	lock_buffer(path[level].bp_sib_bh);
1333 
1334 	node = nilfs_btree_get_nonroot_node(btree, path, level);
1335 	right = nilfs_btree_get_sib_node(btree, path, level);
1336 
1337 	n = nilfs_btree_node_get_nchildren(btree, right);
1338 
1339 	nilfs_btree_node_move_left(btree, node, right, n);
1340 
1341 	if (!buffer_dirty(path[level].bp_bh))
1342 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1343 
1344 	unlock_buffer(path[level].bp_bh);
1345 	unlock_buffer(path[level].bp_sib_bh);
1346 
1347 	nilfs_btnode_delete(path[level].bp_sib_bh);
1348 	path[level].bp_sib_bh = NULL;
1349 	path[level + 1].bp_index++;
1350 }
1351 
1352 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1353 			       struct nilfs_btree_path *path,
1354 			       int level, __u64 *keyp, __u64 *ptrp)
1355 {
1356 	struct nilfs_btree_node *root, *child;
1357 	int n;
1358 
1359 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1360 
1361 	lock_buffer(path[level].bp_bh);
1362 	root = nilfs_btree_get_root(btree);
1363 	child = nilfs_btree_get_nonroot_node(btree, path, level);
1364 
1365 	nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1366 	nilfs_btree_node_set_level(btree, root, level);
1367 	n = nilfs_btree_node_get_nchildren(btree, child);
1368 	nilfs_btree_node_move_left(btree, root, child, n);
1369 	unlock_buffer(path[level].bp_bh);
1370 
1371 	nilfs_btnode_delete(path[level].bp_bh);
1372 	path[level].bp_bh = NULL;
1373 }
1374 
1375 
1376 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1377 				      struct nilfs_btree_path *path,
1378 				      int *levelp,
1379 				      struct nilfs_bmap_stats *stats)
1380 {
1381 	struct buffer_head *bh;
1382 	struct nilfs_btree_node *node, *parent, *sib;
1383 	__u64 sibptr;
1384 	int pindex, level, ret;
1385 
1386 	ret = 0;
1387 	stats->bs_nblocks = 0;
1388 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1389 	     level < nilfs_btree_height(btree) - 1;
1390 	     level++) {
1391 		node = nilfs_btree_get_nonroot_node(btree, path, level);
1392 		path[level].bp_oldreq.bpr_ptr =
1393 			nilfs_btree_node_get_ptr(btree, node,
1394 						 path[level].bp_index);
1395 		ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1396 						 &path[level].bp_oldreq);
1397 		if (ret < 0)
1398 			goto err_out_child_node;
1399 
1400 		if (nilfs_btree_node_get_nchildren(btree, node) >
1401 		    nilfs_btree_node_nchildren_min(btree, node)) {
1402 			path[level].bp_op = nilfs_btree_do_delete;
1403 			stats->bs_nblocks++;
1404 			goto out;
1405 		}
1406 
1407 		parent = nilfs_btree_get_node(btree, path, level + 1);
1408 		pindex = path[level + 1].bp_index;
1409 
1410 		if (pindex > 0) {
1411 			/* left sibling */
1412 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1413 							  pindex - 1);
1414 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1415 			if (ret < 0)
1416 				goto err_out_curr_node;
1417 			sib = (struct nilfs_btree_node *)bh->b_data;
1418 			if (nilfs_btree_node_get_nchildren(btree, sib) >
1419 			    nilfs_btree_node_nchildren_min(btree, sib)) {
1420 				path[level].bp_sib_bh = bh;
1421 				path[level].bp_op = nilfs_btree_borrow_left;
1422 				stats->bs_nblocks++;
1423 				goto out;
1424 			} else {
1425 				path[level].bp_sib_bh = bh;
1426 				path[level].bp_op = nilfs_btree_concat_left;
1427 				stats->bs_nblocks++;
1428 				/* continue; */
1429 			}
1430 		} else if (pindex <
1431 			   nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1432 			/* right sibling */
1433 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1434 							  pindex + 1);
1435 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1436 			if (ret < 0)
1437 				goto err_out_curr_node;
1438 			sib = (struct nilfs_btree_node *)bh->b_data;
1439 			if (nilfs_btree_node_get_nchildren(btree, sib) >
1440 			    nilfs_btree_node_nchildren_min(btree, sib)) {
1441 				path[level].bp_sib_bh = bh;
1442 				path[level].bp_op = nilfs_btree_borrow_right;
1443 				stats->bs_nblocks++;
1444 				goto out;
1445 			} else {
1446 				path[level].bp_sib_bh = bh;
1447 				path[level].bp_op = nilfs_btree_concat_right;
1448 				stats->bs_nblocks++;
1449 				/* continue; */
1450 			}
1451 		} else {
1452 			/* no siblings */
1453 			/* the only child of the root node */
1454 			WARN_ON(level != nilfs_btree_height(btree) - 2);
1455 			if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1456 			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1457 				path[level].bp_op = nilfs_btree_shrink;
1458 				stats->bs_nblocks += 2;
1459 			} else {
1460 				path[level].bp_op = nilfs_btree_do_delete;
1461 				stats->bs_nblocks++;
1462 			}
1463 
1464 			goto out;
1465 
1466 		}
1467 	}
1468 
1469 	node = nilfs_btree_get_root(btree);
1470 	path[level].bp_oldreq.bpr_ptr =
1471 		nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1472 
1473 	ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1474 					 &path[level].bp_oldreq);
1475 	if (ret < 0)
1476 		goto err_out_child_node;
1477 
1478 	/* child of the root node is deleted */
1479 	path[level].bp_op = nilfs_btree_do_delete;
1480 	stats->bs_nblocks++;
1481 
1482 	/* success */
1483  out:
1484 	*levelp = level;
1485 	return ret;
1486 
1487 	/* error */
1488  err_out_curr_node:
1489 	nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq);
1490  err_out_child_node:
1491 	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1492 		brelse(path[level].bp_sib_bh);
1493 		nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1494 					 &path[level].bp_oldreq);
1495 	}
1496 	*levelp = level;
1497 	stats->bs_nblocks = 0;
1498 	return ret;
1499 }
1500 
1501 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1502 				      struct nilfs_btree_path *path,
1503 				      int maxlevel)
1504 {
1505 	int level;
1506 
1507 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1508 		nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1509 					  &path[level].bp_oldreq);
1510 		path[level].bp_op(btree, path, level, NULL, NULL);
1511 	}
1512 
1513 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1514 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1515 }
1516 
1517 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1518 
1519 {
1520 	struct nilfs_btree *btree;
1521 	struct nilfs_btree_path *path;
1522 	struct nilfs_bmap_stats stats;
1523 	int level, ret;
1524 
1525 	btree = (struct nilfs_btree *)bmap;
1526 	path = nilfs_btree_alloc_path(btree);
1527 	if (path == NULL)
1528 		return -ENOMEM;
1529 	nilfs_btree_init_path(btree, path);
1530 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1531 				    NILFS_BTREE_LEVEL_NODE_MIN);
1532 	if (ret < 0)
1533 		goto out;
1534 
1535 	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1536 	if (ret < 0)
1537 		goto out;
1538 	nilfs_btree_commit_delete(btree, path, level);
1539 	nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1540 
1541 out:
1542 	nilfs_btree_clear_path(btree, path);
1543 	nilfs_btree_free_path(btree, path);
1544 	return ret;
1545 }
1546 
1547 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1548 {
1549 	struct nilfs_btree *btree;
1550 	struct nilfs_btree_path *path;
1551 	int ret;
1552 
1553 	btree = (struct nilfs_btree *)bmap;
1554 	path = nilfs_btree_alloc_path(btree);
1555 	if (path == NULL)
1556 		return -ENOMEM;
1557 	nilfs_btree_init_path(btree, path);
1558 
1559 	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1560 
1561 	nilfs_btree_clear_path(btree, path);
1562 	nilfs_btree_free_path(btree, path);
1563 
1564 	return ret;
1565 }
1566 
1567 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1568 {
1569 	struct buffer_head *bh;
1570 	struct nilfs_btree *btree;
1571 	struct nilfs_btree_node *root, *node;
1572 	__u64 maxkey, nextmaxkey;
1573 	__u64 ptr;
1574 	int nchildren, ret;
1575 
1576 	btree = (struct nilfs_btree *)bmap;
1577 	root = nilfs_btree_get_root(btree);
1578 	switch (nilfs_btree_height(btree)) {
1579 	case 2:
1580 		bh = NULL;
1581 		node = root;
1582 		break;
1583 	case 3:
1584 		nchildren = nilfs_btree_node_get_nchildren(btree, root);
1585 		if (nchildren > 1)
1586 			return 0;
1587 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1588 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1589 		if (ret < 0)
1590 			return ret;
1591 		node = (struct nilfs_btree_node *)bh->b_data;
1592 		break;
1593 	default:
1594 		return 0;
1595 	}
1596 
1597 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
1598 	maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1599 	nextmaxkey = (nchildren > 1) ?
1600 		nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1601 	if (bh != NULL)
1602 		brelse(bh);
1603 
1604 	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1605 }
1606 
1607 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1608 				   __u64 *keys, __u64 *ptrs, int nitems)
1609 {
1610 	struct buffer_head *bh;
1611 	struct nilfs_btree *btree;
1612 	struct nilfs_btree_node *node, *root;
1613 	__le64 *dkeys;
1614 	__le64 *dptrs;
1615 	__u64 ptr;
1616 	int nchildren, i, ret;
1617 
1618 	btree = (struct nilfs_btree *)bmap;
1619 	root = nilfs_btree_get_root(btree);
1620 	switch (nilfs_btree_height(btree)) {
1621 	case 2:
1622 		bh = NULL;
1623 		node = root;
1624 		break;
1625 	case 3:
1626 		nchildren = nilfs_btree_node_get_nchildren(btree, root);
1627 		WARN_ON(nchildren > 1);
1628 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1629 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1630 		if (ret < 0)
1631 			return ret;
1632 		node = (struct nilfs_btree_node *)bh->b_data;
1633 		break;
1634 	default:
1635 		node = NULL;
1636 		return -EINVAL;
1637 	}
1638 
1639 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
1640 	if (nchildren < nitems)
1641 		nitems = nchildren;
1642 	dkeys = nilfs_btree_node_dkeys(btree, node);
1643 	dptrs = nilfs_btree_node_dptrs(btree, node);
1644 	for (i = 0; i < nitems; i++) {
1645 		keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1646 		ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1647 	}
1648 
1649 	if (bh != NULL)
1650 		brelse(bh);
1651 
1652 	return nitems;
1653 }
1654 
1655 static int
1656 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1657 				       union nilfs_bmap_ptr_req *dreq,
1658 				       union nilfs_bmap_ptr_req *nreq,
1659 				       struct buffer_head **bhp,
1660 				       struct nilfs_bmap_stats *stats)
1661 {
1662 	struct buffer_head *bh;
1663 	struct nilfs_btree *btree;
1664 	int ret;
1665 
1666 	btree = (struct nilfs_btree *)bmap;
1667 	stats->bs_nblocks = 0;
1668 
1669 	/* for data */
1670 	/* cannot find near ptr */
1671 	if (NILFS_BMAP_USE_VBN(bmap))
1672 		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1673 
1674 	ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq);
1675 	if (ret < 0)
1676 		return ret;
1677 
1678 	*bhp = NULL;
1679 	stats->bs_nblocks++;
1680 	if (nreq != NULL) {
1681 		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1682 		ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq);
1683 		if (ret < 0)
1684 			goto err_out_dreq;
1685 
1686 		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1687 		if (ret < 0)
1688 			goto err_out_nreq;
1689 
1690 		*bhp = bh;
1691 		stats->bs_nblocks++;
1692 	}
1693 
1694 	/* success */
1695 	return 0;
1696 
1697 	/* error */
1698  err_out_nreq:
1699 	nilfs_bmap_abort_alloc_ptr(bmap, nreq);
1700  err_out_dreq:
1701 	nilfs_bmap_abort_alloc_ptr(bmap, dreq);
1702 	stats->bs_nblocks = 0;
1703 	return ret;
1704 
1705 }
1706 
1707 static void
1708 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1709 				      __u64 key, __u64 ptr,
1710 				      const __u64 *keys, const __u64 *ptrs,
1711 				      int n,
1712 				      union nilfs_bmap_ptr_req *dreq,
1713 				      union nilfs_bmap_ptr_req *nreq,
1714 				      struct buffer_head *bh)
1715 {
1716 	struct nilfs_btree *btree;
1717 	struct nilfs_btree_node *node;
1718 	__u64 tmpptr;
1719 
1720 	/* free resources */
1721 	if (bmap->b_ops->bop_clear != NULL)
1722 		bmap->b_ops->bop_clear(bmap);
1723 
1724 	/* ptr must be a pointer to a buffer head. */
1725 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1726 
1727 	/* convert and insert */
1728 	btree = (struct nilfs_btree *)bmap;
1729 	nilfs_btree_init(bmap);
1730 	if (nreq != NULL) {
1731 		nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1732 		nilfs_bmap_commit_alloc_ptr(bmap, nreq);
1733 
1734 		/* create child node at level 1 */
1735 		lock_buffer(bh);
1736 		node = (struct nilfs_btree_node *)bh->b_data;
1737 		nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1738 		nilfs_btree_node_insert(btree, node,
1739 					key, dreq->bpr_ptr, n);
1740 		if (!buffer_dirty(bh))
1741 			nilfs_btnode_mark_dirty(bh);
1742 		if (!nilfs_bmap_dirty(bmap))
1743 			nilfs_bmap_set_dirty(bmap);
1744 
1745 		unlock_buffer(bh);
1746 		brelse(bh);
1747 
1748 		/* create root node at level 2 */
1749 		node = nilfs_btree_get_root(btree);
1750 		tmpptr = nreq->bpr_ptr;
1751 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1752 				      2, 1, &keys[0], &tmpptr);
1753 	} else {
1754 		nilfs_bmap_commit_alloc_ptr(bmap, dreq);
1755 
1756 		/* create root node at level 1 */
1757 		node = nilfs_btree_get_root(btree);
1758 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1759 				      1, n, keys, ptrs);
1760 		nilfs_btree_node_insert(btree, node,
1761 					key, dreq->bpr_ptr, n);
1762 		if (!nilfs_bmap_dirty(bmap))
1763 			nilfs_bmap_set_dirty(bmap);
1764 	}
1765 
1766 	if (NILFS_BMAP_USE_VBN(bmap))
1767 		nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1768 }
1769 
1770 /**
1771  * nilfs_btree_convert_and_insert -
1772  * @bmap:
1773  * @key:
1774  * @ptr:
1775  * @keys:
1776  * @ptrs:
1777  * @n:
1778  */
1779 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1780 				   __u64 key, __u64 ptr,
1781 				   const __u64 *keys, const __u64 *ptrs, int n)
1782 {
1783 	struct buffer_head *bh;
1784 	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1785 	struct nilfs_bmap_stats stats;
1786 	int ret;
1787 
1788 	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1789 		di = &dreq;
1790 		ni = NULL;
1791 	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1792 			   1 << bmap->b_inode->i_blkbits)) {
1793 		di = &dreq;
1794 		ni = &nreq;
1795 	} else {
1796 		di = NULL;
1797 		ni = NULL;
1798 		BUG();
1799 	}
1800 
1801 	ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1802 						     &stats);
1803 	if (ret < 0)
1804 		return ret;
1805 	nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1806 					      di, ni, bh);
1807 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1808 	return 0;
1809 }
1810 
1811 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1812 				   struct nilfs_btree_path *path,
1813 				   int level,
1814 				   struct buffer_head *bh)
1815 {
1816 	while ((++level < nilfs_btree_height(btree) - 1) &&
1817 	       !buffer_dirty(path[level].bp_bh))
1818 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1819 
1820 	return 0;
1821 }
1822 
1823 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1824 					struct nilfs_btree_path *path,
1825 					int level)
1826 {
1827 	struct nilfs_btree_node *parent;
1828 	int ret;
1829 
1830 	parent = nilfs_btree_get_node(btree, path, level + 1);
1831 	path[level].bp_oldreq.bpr_ptr =
1832 		nilfs_btree_node_get_ptr(btree, parent,
1833 					 path[level + 1].bp_index);
1834 	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1835 	ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap,
1836 					  &path[level].bp_oldreq,
1837 					  &path[level].bp_newreq);
1838 	if (ret < 0)
1839 		return ret;
1840 
1841 	if (buffer_nilfs_node(path[level].bp_bh)) {
1842 		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1843 		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1844 		path[level].bp_ctxt.bh = path[level].bp_bh;
1845 		ret = nilfs_btnode_prepare_change_key(
1846 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1847 			&path[level].bp_ctxt);
1848 		if (ret < 0) {
1849 			nilfs_bmap_abort_update_v(&btree->bt_bmap,
1850 						  &path[level].bp_oldreq,
1851 						  &path[level].bp_newreq);
1852 			return ret;
1853 		}
1854 	}
1855 
1856 	return 0;
1857 }
1858 
1859 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1860 					struct nilfs_btree_path *path,
1861 					int level)
1862 {
1863 	struct nilfs_btree_node *parent;
1864 
1865 	nilfs_bmap_commit_update_v(&btree->bt_bmap,
1866 				   &path[level].bp_oldreq,
1867 				   &path[level].bp_newreq);
1868 
1869 	if (buffer_nilfs_node(path[level].bp_bh)) {
1870 		nilfs_btnode_commit_change_key(
1871 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1872 			&path[level].bp_ctxt);
1873 		path[level].bp_bh = path[level].bp_ctxt.bh;
1874 	}
1875 	set_buffer_nilfs_volatile(path[level].bp_bh);
1876 
1877 	parent = nilfs_btree_get_node(btree, path, level + 1);
1878 	nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1879 				 path[level].bp_newreq.bpr_ptr);
1880 }
1881 
1882 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1883 				       struct nilfs_btree_path *path,
1884 				       int level)
1885 {
1886 	nilfs_bmap_abort_update_v(&btree->bt_bmap,
1887 				  &path[level].bp_oldreq,
1888 				  &path[level].bp_newreq);
1889 	if (buffer_nilfs_node(path[level].bp_bh))
1890 		nilfs_btnode_abort_change_key(
1891 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1892 			&path[level].bp_ctxt);
1893 }
1894 
1895 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1896 					   struct nilfs_btree_path *path,
1897 					   int minlevel,
1898 					   int *maxlevelp)
1899 {
1900 	int level, ret;
1901 
1902 	level = minlevel;
1903 	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1904 		ret = nilfs_btree_prepare_update_v(btree, path, level);
1905 		if (ret < 0)
1906 			return ret;
1907 	}
1908 	while ((++level < nilfs_btree_height(btree) - 1) &&
1909 	       !buffer_dirty(path[level].bp_bh)) {
1910 
1911 		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1912 		ret = nilfs_btree_prepare_update_v(btree, path, level);
1913 		if (ret < 0)
1914 			goto out;
1915 	}
1916 
1917 	/* success */
1918 	*maxlevelp = level - 1;
1919 	return 0;
1920 
1921 	/* error */
1922  out:
1923 	while (--level > minlevel)
1924 		nilfs_btree_abort_update_v(btree, path, level);
1925 	if (!buffer_nilfs_volatile(path[level].bp_bh))
1926 		nilfs_btree_abort_update_v(btree, path, level);
1927 	return ret;
1928 }
1929 
1930 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1931 					   struct nilfs_btree_path *path,
1932 					   int minlevel,
1933 					   int maxlevel,
1934 					   struct buffer_head *bh)
1935 {
1936 	int level;
1937 
1938 	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1939 		nilfs_btree_commit_update_v(btree, path, minlevel);
1940 
1941 	for (level = minlevel + 1; level <= maxlevel; level++)
1942 		nilfs_btree_commit_update_v(btree, path, level);
1943 }
1944 
1945 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1946 				   struct nilfs_btree_path *path,
1947 				   int level,
1948 				   struct buffer_head *bh)
1949 {
1950 	int maxlevel, ret;
1951 	struct nilfs_btree_node *parent;
1952 	__u64 ptr;
1953 
1954 	get_bh(bh);
1955 	path[level].bp_bh = bh;
1956 	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1957 	if (ret < 0)
1958 		goto out;
1959 
1960 	if (buffer_nilfs_volatile(path[level].bp_bh)) {
1961 		parent = nilfs_btree_get_node(btree, path, level + 1);
1962 		ptr = nilfs_btree_node_get_ptr(btree, parent,
1963 					       path[level + 1].bp_index);
1964 		ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1965 		if (ret < 0)
1966 			goto out;
1967 	}
1968 
1969 	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1970 
1971  out:
1972 	brelse(path[level].bp_bh);
1973 	path[level].bp_bh = NULL;
1974 	return ret;
1975 }
1976 
1977 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1978 				 struct buffer_head *bh)
1979 {
1980 	struct nilfs_btree *btree;
1981 	struct nilfs_btree_path *path;
1982 	struct nilfs_btree_node *node;
1983 	__u64 key;
1984 	int level, ret;
1985 
1986 	WARN_ON(!buffer_dirty(bh));
1987 
1988 	btree = (struct nilfs_btree *)bmap;
1989 	path = nilfs_btree_alloc_path(btree);
1990 	if (path == NULL)
1991 		return -ENOMEM;
1992 	nilfs_btree_init_path(btree, path);
1993 
1994 	if (buffer_nilfs_node(bh)) {
1995 		node = (struct nilfs_btree_node *)bh->b_data;
1996 		key = nilfs_btree_node_get_key(btree, node, 0);
1997 		level = nilfs_btree_node_get_level(btree, node);
1998 	} else {
1999 		key = nilfs_bmap_data_get_key(bmap, bh);
2000 		level = NILFS_BTREE_LEVEL_DATA;
2001 	}
2002 
2003 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2004 	if (ret < 0) {
2005 		if (unlikely(ret == -ENOENT))
2006 			printk(KERN_CRIT "%s: key = %llu, level == %d\n",
2007 			       __func__, (unsigned long long)key, level);
2008 		goto out;
2009 	}
2010 
2011 	ret = NILFS_BMAP_USE_VBN(bmap) ?
2012 		nilfs_btree_propagate_v(btree, path, level, bh) :
2013 		nilfs_btree_propagate_p(btree, path, level, bh);
2014 
2015  out:
2016 	nilfs_btree_clear_path(btree, path);
2017 	nilfs_btree_free_path(btree, path);
2018 
2019 	return ret;
2020 }
2021 
2022 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
2023 				    struct buffer_head *bh)
2024 {
2025 	return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
2026 }
2027 
2028 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
2029 					 struct list_head *lists,
2030 					 struct buffer_head *bh)
2031 {
2032 	struct list_head *head;
2033 	struct buffer_head *cbh;
2034 	struct nilfs_btree_node *node, *cnode;
2035 	__u64 key, ckey;
2036 	int level;
2037 
2038 	get_bh(bh);
2039 	node = (struct nilfs_btree_node *)bh->b_data;
2040 	key = nilfs_btree_node_get_key(btree, node, 0);
2041 	level = nilfs_btree_node_get_level(btree, node);
2042 	list_for_each(head, &lists[level]) {
2043 		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2044 		cnode = (struct nilfs_btree_node *)cbh->b_data;
2045 		ckey = nilfs_btree_node_get_key(btree, cnode, 0);
2046 		if (key < ckey)
2047 			break;
2048 	}
2049 	list_add_tail(&bh->b_assoc_buffers, head);
2050 }
2051 
2052 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2053 					     struct list_head *listp)
2054 {
2055 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2056 	struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
2057 	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2058 	struct pagevec pvec;
2059 	struct buffer_head *bh, *head;
2060 	pgoff_t index = 0;
2061 	int level, i;
2062 
2063 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2064 	     level < NILFS_BTREE_LEVEL_MAX;
2065 	     level++)
2066 		INIT_LIST_HEAD(&lists[level]);
2067 
2068 	pagevec_init(&pvec, 0);
2069 
2070 	while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2071 				  PAGEVEC_SIZE)) {
2072 		for (i = 0; i < pagevec_count(&pvec); i++) {
2073 			bh = head = page_buffers(pvec.pages[i]);
2074 			do {
2075 				if (buffer_dirty(bh))
2076 					nilfs_btree_add_dirty_buffer(btree,
2077 								     lists, bh);
2078 			} while ((bh = bh->b_this_page) != head);
2079 		}
2080 		pagevec_release(&pvec);
2081 		cond_resched();
2082 	}
2083 
2084 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2085 	     level < NILFS_BTREE_LEVEL_MAX;
2086 	     level++)
2087 		list_splice(&lists[level], listp->prev);
2088 }
2089 
2090 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2091 				struct nilfs_btree_path *path,
2092 				int level,
2093 				struct buffer_head **bh,
2094 				sector_t blocknr,
2095 				union nilfs_binfo *binfo)
2096 {
2097 	struct nilfs_btree_node *parent;
2098 	__u64 key;
2099 	__u64 ptr;
2100 	int ret;
2101 
2102 	parent = nilfs_btree_get_node(btree, path, level + 1);
2103 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2104 				       path[level + 1].bp_index);
2105 	if (buffer_nilfs_node(*bh)) {
2106 		path[level].bp_ctxt.oldkey = ptr;
2107 		path[level].bp_ctxt.newkey = blocknr;
2108 		path[level].bp_ctxt.bh = *bh;
2109 		ret = nilfs_btnode_prepare_change_key(
2110 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2111 			&path[level].bp_ctxt);
2112 		if (ret < 0)
2113 			return ret;
2114 		nilfs_btnode_commit_change_key(
2115 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2116 			&path[level].bp_ctxt);
2117 		*bh = path[level].bp_ctxt.bh;
2118 	}
2119 
2120 	nilfs_btree_node_set_ptr(btree, parent,
2121 				 path[level + 1].bp_index, blocknr);
2122 
2123 	key = nilfs_btree_node_get_key(btree, parent,
2124 				       path[level + 1].bp_index);
2125 	/* on-disk format */
2126 	binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2127 	binfo->bi_dat.bi_level = level;
2128 
2129 	return 0;
2130 }
2131 
2132 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2133 				struct nilfs_btree_path *path,
2134 				int level,
2135 				struct buffer_head **bh,
2136 				sector_t blocknr,
2137 				union nilfs_binfo *binfo)
2138 {
2139 	struct nilfs_btree_node *parent;
2140 	__u64 key;
2141 	__u64 ptr;
2142 	union nilfs_bmap_ptr_req req;
2143 	int ret;
2144 
2145 	parent = nilfs_btree_get_node(btree, path, level + 1);
2146 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2147 				       path[level + 1].bp_index);
2148 	req.bpr_ptr = ptr;
2149 	ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
2150 	if (unlikely(ret < 0))
2151 		return ret;
2152 
2153 	key = nilfs_btree_node_get_key(btree, parent,
2154 				       path[level + 1].bp_index);
2155 	/* on-disk format */
2156 	binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2157 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2158 
2159 	return 0;
2160 }
2161 
2162 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2163 			      struct buffer_head **bh,
2164 			      sector_t blocknr,
2165 			      union nilfs_binfo *binfo)
2166 {
2167 	struct nilfs_btree *btree;
2168 	struct nilfs_btree_path *path;
2169 	struct nilfs_btree_node *node;
2170 	__u64 key;
2171 	int level, ret;
2172 
2173 	btree = (struct nilfs_btree *)bmap;
2174 	path = nilfs_btree_alloc_path(btree);
2175 	if (path == NULL)
2176 		return -ENOMEM;
2177 	nilfs_btree_init_path(btree, path);
2178 
2179 	if (buffer_nilfs_node(*bh)) {
2180 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2181 		key = nilfs_btree_node_get_key(btree, node, 0);
2182 		level = nilfs_btree_node_get_level(btree, node);
2183 	} else {
2184 		key = nilfs_bmap_data_get_key(bmap, *bh);
2185 		level = NILFS_BTREE_LEVEL_DATA;
2186 	}
2187 
2188 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2189 	if (ret < 0) {
2190 		WARN_ON(ret == -ENOENT);
2191 		goto out;
2192 	}
2193 
2194 	ret = NILFS_BMAP_USE_VBN(bmap) ?
2195 		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2196 		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2197 
2198  out:
2199 	nilfs_btree_clear_path(btree, path);
2200 	nilfs_btree_free_path(btree, path);
2201 
2202 	return ret;
2203 }
2204 
2205 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2206 				 struct buffer_head **bh,
2207 				 sector_t blocknr,
2208 				 union nilfs_binfo *binfo)
2209 {
2210 	struct nilfs_btree *btree;
2211 	struct nilfs_btree_node *node;
2212 	__u64 key;
2213 	int ret;
2214 
2215 	btree = (struct nilfs_btree *)bmap;
2216 	ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2217 	if (ret < 0)
2218 		return ret;
2219 
2220 	if (buffer_nilfs_node(*bh)) {
2221 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2222 		key = nilfs_btree_node_get_key(btree, node, 0);
2223 	} else
2224 		key = nilfs_bmap_data_get_key(bmap, *bh);
2225 
2226 	/* on-disk format */
2227 	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2228 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2229 
2230 	return 0;
2231 }
2232 
2233 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2234 {
2235 	struct buffer_head *bh;
2236 	struct nilfs_btree *btree;
2237 	struct nilfs_btree_path *path;
2238 	__u64 ptr;
2239 	int ret;
2240 
2241 	btree = (struct nilfs_btree *)bmap;
2242 	path = nilfs_btree_alloc_path(btree);
2243 	if (path == NULL)
2244 		return -ENOMEM;
2245 	nilfs_btree_init_path(btree, path);
2246 
2247 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2248 	if (ret < 0) {
2249 		WARN_ON(ret == -ENOENT);
2250 		goto out;
2251 	}
2252 	ret = nilfs_btree_get_block(btree, ptr, &bh);
2253 	if (ret < 0) {
2254 		WARN_ON(ret == -ENOENT);
2255 		goto out;
2256 	}
2257 
2258 	if (!buffer_dirty(bh))
2259 		nilfs_btnode_mark_dirty(bh);
2260 	brelse(bh);
2261 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
2262 		nilfs_bmap_set_dirty(&btree->bt_bmap);
2263 
2264  out:
2265 	nilfs_btree_clear_path(btree, path);
2266 	nilfs_btree_free_path(btree, path);
2267 	return ret;
2268 }
2269 
2270 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2271 	.bop_lookup		=	nilfs_btree_lookup,
2272 	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2273 	.bop_insert		=	nilfs_btree_insert,
2274 	.bop_delete		=	nilfs_btree_delete,
2275 	.bop_clear		=	NULL,
2276 
2277 	.bop_propagate		=	nilfs_btree_propagate,
2278 
2279 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2280 
2281 	.bop_assign		=	nilfs_btree_assign,
2282 	.bop_mark		=	nilfs_btree_mark,
2283 
2284 	.bop_last_key		=	nilfs_btree_last_key,
2285 	.bop_check_insert	=	NULL,
2286 	.bop_check_delete	=	nilfs_btree_check_delete,
2287 	.bop_gather_data	=	nilfs_btree_gather_data,
2288 };
2289 
2290 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2291 	.bop_lookup		=	NULL,
2292 	.bop_lookup_contig	=	NULL,
2293 	.bop_insert		=	NULL,
2294 	.bop_delete		=	NULL,
2295 	.bop_clear		=	NULL,
2296 
2297 	.bop_propagate		=	nilfs_btree_propagate_gc,
2298 
2299 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2300 
2301 	.bop_assign		=	nilfs_btree_assign_gc,
2302 	.bop_mark		=	NULL,
2303 
2304 	.bop_last_key		=	NULL,
2305 	.bop_check_insert	=	NULL,
2306 	.bop_check_delete	=	NULL,
2307 	.bop_gather_data	=	NULL,
2308 };
2309 
2310 int nilfs_btree_init(struct nilfs_bmap *bmap)
2311 {
2312 	bmap->b_ops = &nilfs_btree_ops;
2313 	return 0;
2314 }
2315 
2316 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2317 {
2318 	bmap->b_ops = &nilfs_btree_ops_gc;
2319 }
2320