xref: /linux/fs/reiserfs/fix_node.c (revision fec6d055da71fb02a76f9c2c12427fa79974018b)
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 /**
6  ** old_item_num
7  ** old_entry_num
8  ** set_entry_sizes
9  ** create_virtual_node
10  ** check_left
11  ** check_right
12  ** directory_part_size
13  ** get_num_ver
14  ** set_parameters
15  ** is_leaf_removable
16  ** are_leaves_removable
17  ** get_empty_nodes
18  ** get_lfree
19  ** get_rfree
20  ** is_left_neighbor_in_cache
21  ** decrement_key
22  ** get_far_parent
23  ** get_parents
24  ** can_node_be_removed
25  ** ip_check_balance
26  ** dc_check_balance_internal
27  ** dc_check_balance_leaf
28  ** dc_check_balance
29  ** check_balance
30  ** get_direct_parent
31  ** get_neighbors
32  ** fix_nodes
33  **
34  **
35  **/
36 
37 #include <linux/time.h>
38 #include <linux/string.h>
39 #include <linux/reiserfs_fs.h>
40 #include <linux/buffer_head.h>
41 
42 /* To make any changes in the tree we find a node, that contains item
43    to be changed/deleted or position in the node we insert a new item
44    to. We call this node S. To do balancing we need to decide what we
45    will shift to left/right neighbor, or to a new node, where new item
46    will be etc. To make this analysis simpler we build virtual
47    node. Virtual node is an array of items, that will replace items of
48    node S. (For instance if we are going to delete an item, virtual
49    node does not contain it). Virtual node keeps information about
50    item sizes and types, mergeability of first and last items, sizes
51    of all entries in directory item. We use this array of items when
52    calculating what we can shift to neighbors and how many nodes we
53    have to have if we do not any shiftings, if we shift to left/right
54    neighbor or to both. */
55 
56 /* taking item number in virtual node, returns number of item, that it has in source buffer */
57 static inline int old_item_num(int new_num, int affected_item_num, int mode)
58 {
59 	if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
60 		return new_num;
61 
62 	if (mode == M_INSERT) {
63 
64 		RFALSE(new_num == 0,
65 		       "vs-8005: for INSERT mode and item number of inserted item");
66 
67 		return new_num - 1;
68 	}
69 
70 	RFALSE(mode != M_DELETE,
71 	       "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
72 	       mode);
73 	/* delete mode */
74 	return new_num + 1;
75 }
76 
77 static void create_virtual_node(struct tree_balance *tb, int h)
78 {
79 	struct item_head *ih;
80 	struct virtual_node *vn = tb->tb_vn;
81 	int new_num;
82 	struct buffer_head *Sh;	/* this comes from tb->S[h] */
83 
84 	Sh = PATH_H_PBUFFER(tb->tb_path, h);
85 
86 	/* size of changed node */
87 	vn->vn_size =
88 	    MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
89 
90 	/* for internal nodes array if virtual items is not created */
91 	if (h) {
92 		vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
93 		return;
94 	}
95 
96 	/* number of items in virtual node  */
97 	vn->vn_nr_item =
98 	    B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
99 	    ((vn->vn_mode == M_DELETE) ? 1 : 0);
100 
101 	/* first virtual item */
102 	vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
103 	memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
104 	vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
105 
106 	/* first item in the node */
107 	ih = B_N_PITEM_HEAD(Sh, 0);
108 
109 	/* define the mergeability for 0-th item (if it is not being deleted) */
110 	if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
111 	    && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
112 		vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
113 
114 	/* go through all items those remain in the virtual node (except for the new (inserted) one) */
115 	for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
116 		int j;
117 		struct virtual_item *vi = vn->vn_vi + new_num;
118 		int is_affected =
119 		    ((new_num != vn->vn_affected_item_num) ? 0 : 1);
120 
121 		if (is_affected && vn->vn_mode == M_INSERT)
122 			continue;
123 
124 		/* get item number in source node */
125 		j = old_item_num(new_num, vn->vn_affected_item_num,
126 				 vn->vn_mode);
127 
128 		vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
129 		vi->vi_ih = ih + j;
130 		vi->vi_item = B_I_PITEM(Sh, ih + j);
131 		vi->vi_uarea = vn->vn_free_ptr;
132 
133 		// FIXME: there is no check, that item operation did not
134 		// consume too much memory
135 		vn->vn_free_ptr +=
136 		    op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
137 		if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
138 			reiserfs_panic(tb->tb_sb,
139 				       "vs-8030: create_virtual_node: "
140 				       "virtual node space consumed");
141 
142 		if (!is_affected)
143 			/* this is not being changed */
144 			continue;
145 
146 		if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
147 			vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
148 			vi->vi_new_data = vn->vn_data;	// pointer to data which is going to be pasted
149 		}
150 	}
151 
152 	/* virtual inserted item is not defined yet */
153 	if (vn->vn_mode == M_INSERT) {
154 		struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
155 
156 		RFALSE(vn->vn_ins_ih == 0,
157 		       "vs-8040: item header of inserted item is not specified");
158 		vi->vi_item_len = tb->insert_size[0];
159 		vi->vi_ih = vn->vn_ins_ih;
160 		vi->vi_item = vn->vn_data;
161 		vi->vi_uarea = vn->vn_free_ptr;
162 
163 		op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
164 			     tb->insert_size[0]);
165 	}
166 
167 	/* set right merge flag we take right delimiting key and check whether it is a mergeable item */
168 	if (tb->CFR[0]) {
169 		struct reiserfs_key *key;
170 
171 		key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
172 		if (op_is_left_mergeable(key, Sh->b_size)
173 		    && (vn->vn_mode != M_DELETE
174 			|| vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
175 			vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
176 			    VI_TYPE_RIGHT_MERGEABLE;
177 
178 #ifdef CONFIG_REISERFS_CHECK
179 		if (op_is_left_mergeable(key, Sh->b_size) &&
180 		    !(vn->vn_mode != M_DELETE
181 		      || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
182 			/* we delete last item and it could be merged with right neighbor's first item */
183 			if (!
184 			    (B_NR_ITEMS(Sh) == 1
185 			     && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
186 			     && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
187 				/* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
188 				print_block(Sh, 0, -1, -1);
189 				reiserfs_panic(tb->tb_sb,
190 					       "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
191 					       key, vn->vn_affected_item_num,
192 					       vn->vn_mode, M_DELETE);
193 			}
194 		}
195 #endif
196 
197 	}
198 }
199 
200 /* using virtual node check, how many items can be shifted to left
201    neighbor */
202 static void check_left(struct tree_balance *tb, int h, int cur_free)
203 {
204 	int i;
205 	struct virtual_node *vn = tb->tb_vn;
206 	struct virtual_item *vi;
207 	int d_size, ih_size;
208 
209 	RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
210 
211 	/* internal level */
212 	if (h > 0) {
213 		tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
214 		return;
215 	}
216 
217 	/* leaf level */
218 
219 	if (!cur_free || !vn->vn_nr_item) {
220 		/* no free space or nothing to move */
221 		tb->lnum[h] = 0;
222 		tb->lbytes = -1;
223 		return;
224 	}
225 
226 	RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
227 	       "vs-8055: parent does not exist or invalid");
228 
229 	vi = vn->vn_vi;
230 	if ((unsigned int)cur_free >=
231 	    (vn->vn_size -
232 	     ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
233 		/* all contents of S[0] fits into L[0] */
234 
235 		RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
236 		       "vs-8055: invalid mode or balance condition failed");
237 
238 		tb->lnum[0] = vn->vn_nr_item;
239 		tb->lbytes = -1;
240 		return;
241 	}
242 
243 	d_size = 0, ih_size = IH_SIZE;
244 
245 	/* first item may be merge with last item in left neighbor */
246 	if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
247 		d_size = -((int)IH_SIZE), ih_size = 0;
248 
249 	tb->lnum[0] = 0;
250 	for (i = 0; i < vn->vn_nr_item;
251 	     i++, ih_size = IH_SIZE, d_size = 0, vi++) {
252 		d_size += vi->vi_item_len;
253 		if (cur_free >= d_size) {
254 			/* the item can be shifted entirely */
255 			cur_free -= d_size;
256 			tb->lnum[0]++;
257 			continue;
258 		}
259 
260 		/* the item cannot be shifted entirely, try to split it */
261 		/* check whether L[0] can hold ih and at least one byte of the item body */
262 		if (cur_free <= ih_size) {
263 			/* cannot shift even a part of the current item */
264 			tb->lbytes = -1;
265 			return;
266 		}
267 		cur_free -= ih_size;
268 
269 		tb->lbytes = op_check_left(vi, cur_free, 0, 0);
270 		if (tb->lbytes != -1)
271 			/* count partially shifted item */
272 			tb->lnum[0]++;
273 
274 		break;
275 	}
276 
277 	return;
278 }
279 
280 /* using virtual node check, how many items can be shifted to right
281    neighbor */
282 static void check_right(struct tree_balance *tb, int h, int cur_free)
283 {
284 	int i;
285 	struct virtual_node *vn = tb->tb_vn;
286 	struct virtual_item *vi;
287 	int d_size, ih_size;
288 
289 	RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
290 
291 	/* internal level */
292 	if (h > 0) {
293 		tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
294 		return;
295 	}
296 
297 	/* leaf level */
298 
299 	if (!cur_free || !vn->vn_nr_item) {
300 		/* no free space  */
301 		tb->rnum[h] = 0;
302 		tb->rbytes = -1;
303 		return;
304 	}
305 
306 	RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
307 	       "vs-8075: parent does not exist or invalid");
308 
309 	vi = vn->vn_vi + vn->vn_nr_item - 1;
310 	if ((unsigned int)cur_free >=
311 	    (vn->vn_size -
312 	     ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
313 		/* all contents of S[0] fits into R[0] */
314 
315 		RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
316 		       "vs-8080: invalid mode or balance condition failed");
317 
318 		tb->rnum[h] = vn->vn_nr_item;
319 		tb->rbytes = -1;
320 		return;
321 	}
322 
323 	d_size = 0, ih_size = IH_SIZE;
324 
325 	/* last item may be merge with first item in right neighbor */
326 	if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
327 		d_size = -(int)IH_SIZE, ih_size = 0;
328 
329 	tb->rnum[0] = 0;
330 	for (i = vn->vn_nr_item - 1; i >= 0;
331 	     i--, d_size = 0, ih_size = IH_SIZE, vi--) {
332 		d_size += vi->vi_item_len;
333 		if (cur_free >= d_size) {
334 			/* the item can be shifted entirely */
335 			cur_free -= d_size;
336 			tb->rnum[0]++;
337 			continue;
338 		}
339 
340 		/* check whether R[0] can hold ih and at least one byte of the item body */
341 		if (cur_free <= ih_size) {	/* cannot shift even a part of the current item */
342 			tb->rbytes = -1;
343 			return;
344 		}
345 
346 		/* R[0] can hold the header of the item and at least one byte of its body */
347 		cur_free -= ih_size;	/* cur_free is still > 0 */
348 
349 		tb->rbytes = op_check_right(vi, cur_free);
350 		if (tb->rbytes != -1)
351 			/* count partially shifted item */
352 			tb->rnum[0]++;
353 
354 		break;
355 	}
356 
357 	return;
358 }
359 
360 /*
361  * from - number of items, which are shifted to left neighbor entirely
362  * to - number of item, which are shifted to right neighbor entirely
363  * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
364  * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
365 static int get_num_ver(int mode, struct tree_balance *tb, int h,
366 		       int from, int from_bytes,
367 		       int to, int to_bytes, short *snum012, int flow)
368 {
369 	int i;
370 	int cur_free;
371 	//    int bytes;
372 	int units;
373 	struct virtual_node *vn = tb->tb_vn;
374 	//    struct virtual_item * vi;
375 
376 	int total_node_size, max_node_size, current_item_size;
377 	int needed_nodes;
378 	int start_item,		/* position of item we start filling node from */
379 	 end_item,		/* position of item we finish filling node by */
380 	 start_bytes,		/* number of first bytes (entries for directory) of start_item-th item
381 				   we do not include into node that is being filled */
382 	 end_bytes;		/* number of last bytes (entries for directory) of end_item-th item
383 				   we do node include into node that is being filled */
384 	int split_item_positions[2];	/* these are positions in virtual item of
385 					   items, that are split between S[0] and
386 					   S1new and S1new and S2new */
387 
388 	split_item_positions[0] = -1;
389 	split_item_positions[1] = -1;
390 
391 	/* We only create additional nodes if we are in insert or paste mode
392 	   or we are in replace mode at the internal level. If h is 0 and
393 	   the mode is M_REPLACE then in fix_nodes we change the mode to
394 	   paste or insert before we get here in the code.  */
395 	RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
396 	       "vs-8100: insert_size < 0 in overflow");
397 
398 	max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
399 
400 	/* snum012 [0-2] - number of items, that lay
401 	   to S[0], first new node and second new node */
402 	snum012[3] = -1;	/* s1bytes */
403 	snum012[4] = -1;	/* s2bytes */
404 
405 	/* internal level */
406 	if (h > 0) {
407 		i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
408 		if (i == max_node_size)
409 			return 1;
410 		return (i / max_node_size + 1);
411 	}
412 
413 	/* leaf level */
414 	needed_nodes = 1;
415 	total_node_size = 0;
416 	cur_free = max_node_size;
417 
418 	// start from 'from'-th item
419 	start_item = from;
420 	// skip its first 'start_bytes' units
421 	start_bytes = ((from_bytes != -1) ? from_bytes : 0);
422 
423 	// last included item is the 'end_item'-th one
424 	end_item = vn->vn_nr_item - to - 1;
425 	// do not count last 'end_bytes' units of 'end_item'-th item
426 	end_bytes = (to_bytes != -1) ? to_bytes : 0;
427 
428 	/* go through all item beginning from the start_item-th item and ending by
429 	   the end_item-th item. Do not count first 'start_bytes' units of
430 	   'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
431 
432 	for (i = start_item; i <= end_item; i++) {
433 		struct virtual_item *vi = vn->vn_vi + i;
434 		int skip_from_end = ((i == end_item) ? end_bytes : 0);
435 
436 		RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
437 
438 		/* get size of current item */
439 		current_item_size = vi->vi_item_len;
440 
441 		/* do not take in calculation head part (from_bytes) of from-th item */
442 		current_item_size -=
443 		    op_part_size(vi, 0 /*from start */ , start_bytes);
444 
445 		/* do not take in calculation tail part of last item */
446 		current_item_size -=
447 		    op_part_size(vi, 1 /*from end */ , skip_from_end);
448 
449 		/* if item fits into current node entierly */
450 		if (total_node_size + current_item_size <= max_node_size) {
451 			snum012[needed_nodes - 1]++;
452 			total_node_size += current_item_size;
453 			start_bytes = 0;
454 			continue;
455 		}
456 
457 		if (current_item_size > max_node_size) {
458 			/* virtual item length is longer, than max size of item in
459 			   a node. It is impossible for direct item */
460 			RFALSE(is_direct_le_ih(vi->vi_ih),
461 			       "vs-8110: "
462 			       "direct item length is %d. It can not be longer than %d",
463 			       current_item_size, max_node_size);
464 			/* we will try to split it */
465 			flow = 1;
466 		}
467 
468 		if (!flow) {
469 			/* as we do not split items, take new node and continue */
470 			needed_nodes++;
471 			i--;
472 			total_node_size = 0;
473 			continue;
474 		}
475 		// calculate number of item units which fit into node being
476 		// filled
477 		{
478 			int free_space;
479 
480 			free_space = max_node_size - total_node_size - IH_SIZE;
481 			units =
482 			    op_check_left(vi, free_space, start_bytes,
483 					  skip_from_end);
484 			if (units == -1) {
485 				/* nothing fits into current node, take new node and continue */
486 				needed_nodes++, i--, total_node_size = 0;
487 				continue;
488 			}
489 		}
490 
491 		/* something fits into the current node */
492 		//if (snum012[3] != -1 || needed_nodes != 1)
493 		//  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
494 		//snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
495 		start_bytes += units;
496 		snum012[needed_nodes - 1 + 3] = units;
497 
498 		if (needed_nodes > 2)
499 			reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: "
500 					 "split_item_position is out of boundary");
501 		snum012[needed_nodes - 1]++;
502 		split_item_positions[needed_nodes - 1] = i;
503 		needed_nodes++;
504 		/* continue from the same item with start_bytes != -1 */
505 		start_item = i;
506 		i--;
507 		total_node_size = 0;
508 	}
509 
510 	// sum012[4] (if it is not -1) contains number of units of which
511 	// are to be in S1new, snum012[3] - to be in S0. They are supposed
512 	// to be S1bytes and S2bytes correspondingly, so recalculate
513 	if (snum012[4] > 0) {
514 		int split_item_num;
515 		int bytes_to_r, bytes_to_l;
516 		int bytes_to_S1new;
517 
518 		split_item_num = split_item_positions[1];
519 		bytes_to_l =
520 		    ((from == split_item_num
521 		      && from_bytes != -1) ? from_bytes : 0);
522 		bytes_to_r =
523 		    ((end_item == split_item_num
524 		      && end_bytes != -1) ? end_bytes : 0);
525 		bytes_to_S1new =
526 		    ((split_item_positions[0] ==
527 		      split_item_positions[1]) ? snum012[3] : 0);
528 
529 		// s2bytes
530 		snum012[4] =
531 		    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
532 		    bytes_to_r - bytes_to_l - bytes_to_S1new;
533 
534 		if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
535 		    vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
536 			reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not "
537 					 "directory or indirect item");
538 	}
539 
540 	/* now we know S2bytes, calculate S1bytes */
541 	if (snum012[3] > 0) {
542 		int split_item_num;
543 		int bytes_to_r, bytes_to_l;
544 		int bytes_to_S2new;
545 
546 		split_item_num = split_item_positions[0];
547 		bytes_to_l =
548 		    ((from == split_item_num
549 		      && from_bytes != -1) ? from_bytes : 0);
550 		bytes_to_r =
551 		    ((end_item == split_item_num
552 		      && end_bytes != -1) ? end_bytes : 0);
553 		bytes_to_S2new =
554 		    ((split_item_positions[0] == split_item_positions[1]
555 		      && snum012[4] != -1) ? snum012[4] : 0);
556 
557 		// s1bytes
558 		snum012[3] =
559 		    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
560 		    bytes_to_r - bytes_to_l - bytes_to_S2new;
561 	}
562 
563 	return needed_nodes;
564 }
565 
566 #ifdef CONFIG_REISERFS_CHECK
567 extern struct tree_balance *cur_tb;
568 #endif
569 
570 /* Set parameters for balancing.
571  * Performs write of results of analysis of balancing into structure tb,
572  * where it will later be used by the functions that actually do the balancing.
573  * Parameters:
574  *	tb	tree_balance structure;
575  *	h	current level of the node;
576  *	lnum	number of items from S[h] that must be shifted to L[h];
577  *	rnum	number of items from S[h] that must be shifted to R[h];
578  *	blk_num	number of blocks that S[h] will be splitted into;
579  *	s012	number of items that fall into splitted nodes.
580  *	lbytes	number of bytes which flow to the left neighbor from the item that is not
581  *		not shifted entirely
582  *	rbytes	number of bytes which flow to the right neighbor from the item that is not
583  *		not shifted entirely
584  *	s1bytes	number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
585  */
586 
587 static void set_parameters(struct tree_balance *tb, int h, int lnum,
588 			   int rnum, int blk_num, short *s012, int lb, int rb)
589 {
590 
591 	tb->lnum[h] = lnum;
592 	tb->rnum[h] = rnum;
593 	tb->blknum[h] = blk_num;
594 
595 	if (h == 0) {		/* only for leaf level */
596 		if (s012 != NULL) {
597 			tb->s0num = *s012++,
598 			    tb->s1num = *s012++, tb->s2num = *s012++;
599 			tb->s1bytes = *s012++;
600 			tb->s2bytes = *s012;
601 		}
602 		tb->lbytes = lb;
603 		tb->rbytes = rb;
604 	}
605 	PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
606 	PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
607 
608 	PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
609 	PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
610 }
611 
612 /* check, does node disappear if we shift tb->lnum[0] items to left
613    neighbor and tb->rnum[0] to the right one. */
614 static int is_leaf_removable(struct tree_balance *tb)
615 {
616 	struct virtual_node *vn = tb->tb_vn;
617 	int to_left, to_right;
618 	int size;
619 	int remain_items;
620 
621 	/* number of items, that will be shifted to left (right) neighbor
622 	   entirely */
623 	to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
624 	to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
625 	remain_items = vn->vn_nr_item;
626 
627 	/* how many items remain in S[0] after shiftings to neighbors */
628 	remain_items -= (to_left + to_right);
629 
630 	if (remain_items < 1) {
631 		/* all content of node can be shifted to neighbors */
632 		set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
633 			       NULL, -1, -1);
634 		return 1;
635 	}
636 
637 	if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
638 		/* S[0] is not removable */
639 		return 0;
640 
641 	/* check, whether we can divide 1 remaining item between neighbors */
642 
643 	/* get size of remaining item (in item units) */
644 	size = op_unit_num(&(vn->vn_vi[to_left]));
645 
646 	if (tb->lbytes + tb->rbytes >= size) {
647 		set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
648 			       tb->lbytes, -1);
649 		return 1;
650 	}
651 
652 	return 0;
653 }
654 
655 /* check whether L, S, R can be joined in one node */
656 static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
657 {
658 	struct virtual_node *vn = tb->tb_vn;
659 	int ih_size;
660 	struct buffer_head *S0;
661 
662 	S0 = PATH_H_PBUFFER(tb->tb_path, 0);
663 
664 	ih_size = 0;
665 	if (vn->vn_nr_item) {
666 		if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
667 			ih_size += IH_SIZE;
668 
669 		if (vn->vn_vi[vn->vn_nr_item - 1].
670 		    vi_type & VI_TYPE_RIGHT_MERGEABLE)
671 			ih_size += IH_SIZE;
672 	} else {
673 		/* there was only one item and it will be deleted */
674 		struct item_head *ih;
675 
676 		RFALSE(B_NR_ITEMS(S0) != 1,
677 		       "vs-8125: item number must be 1: it is %d",
678 		       B_NR_ITEMS(S0));
679 
680 		ih = B_N_PITEM_HEAD(S0, 0);
681 		if (tb->CFR[0]
682 		    && !comp_short_le_keys(&(ih->ih_key),
683 					   B_N_PDELIM_KEY(tb->CFR[0],
684 							  tb->rkey[0])))
685 			if (is_direntry_le_ih(ih)) {
686 				/* Directory must be in correct state here: that is
687 				   somewhere at the left side should exist first directory
688 				   item. But the item being deleted can not be that first
689 				   one because its right neighbor is item of the same
690 				   directory. (But first item always gets deleted in last
691 				   turn). So, neighbors of deleted item can be merged, so
692 				   we can save ih_size */
693 				ih_size = IH_SIZE;
694 
695 				/* we might check that left neighbor exists and is of the
696 				   same directory */
697 				RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
698 				       "vs-8130: first directory item can not be removed until directory is not empty");
699 			}
700 
701 	}
702 
703 	if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
704 		set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
705 		PROC_INFO_INC(tb->tb_sb, leaves_removable);
706 		return 1;
707 	}
708 	return 0;
709 
710 }
711 
712 /* when we do not split item, lnum and rnum are numbers of entire items */
713 #define SET_PAR_SHIFT_LEFT \
714 if (h)\
715 {\
716    int to_l;\
717    \
718    to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
719 	      (MAX_NR_KEY(Sh) + 1 - lpar);\
720 	      \
721 	      set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
722 }\
723 else \
724 {\
725    if (lset==LEFT_SHIFT_FLOW)\
726      set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
727 		     tb->lbytes, -1);\
728    else\
729      set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
730 		     -1, -1);\
731 }
732 
733 #define SET_PAR_SHIFT_RIGHT \
734 if (h)\
735 {\
736    int to_r;\
737    \
738    to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
739    \
740    set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
741 }\
742 else \
743 {\
744    if (rset==RIGHT_SHIFT_FLOW)\
745      set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
746 		  -1, tb->rbytes);\
747    else\
748      set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
749 		  -1, -1);\
750 }
751 
752 static void free_buffers_in_tb(struct tree_balance *p_s_tb)
753 {
754 	int n_counter;
755 
756 	decrement_counters_in_path(p_s_tb->tb_path);
757 
758 	for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) {
759 		decrement_bcount(p_s_tb->L[n_counter]);
760 		p_s_tb->L[n_counter] = NULL;
761 		decrement_bcount(p_s_tb->R[n_counter]);
762 		p_s_tb->R[n_counter] = NULL;
763 		decrement_bcount(p_s_tb->FL[n_counter]);
764 		p_s_tb->FL[n_counter] = NULL;
765 		decrement_bcount(p_s_tb->FR[n_counter]);
766 		p_s_tb->FR[n_counter] = NULL;
767 		decrement_bcount(p_s_tb->CFL[n_counter]);
768 		p_s_tb->CFL[n_counter] = NULL;
769 		decrement_bcount(p_s_tb->CFR[n_counter]);
770 		p_s_tb->CFR[n_counter] = NULL;
771 	}
772 }
773 
774 /* Get new buffers for storing new nodes that are created while balancing.
775  * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
776  *	        CARRY_ON - schedule didn't occur while the function worked;
777  *	        NO_DISK_SPACE - no disk space.
778  */
779 /* The function is NOT SCHEDULE-SAFE! */
780 static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
781 {
782 	struct buffer_head *p_s_new_bh,
783 	    *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h);
784 	b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
785 	int n_counter, n_number_of_freeblk, n_amount_needed,	/* number of needed empty blocks */
786 	 n_retval = CARRY_ON;
787 	struct super_block *p_s_sb = p_s_tb->tb_sb;
788 
789 	/* number_of_freeblk is the number of empty blocks which have been
790 	   acquired for use by the balancing algorithm minus the number of
791 	   empty blocks used in the previous levels of the analysis,
792 	   number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
793 	   after empty blocks are acquired, and the balancing analysis is
794 	   then restarted, amount_needed is the number needed by this level
795 	   (n_h) of the balancing analysis.
796 
797 	   Note that for systems with many processes writing, it would be
798 	   more layout optimal to calculate the total number needed by all
799 	   levels and then to run reiserfs_new_blocks to get all of them at once.  */
800 
801 	/* Initiate number_of_freeblk to the amount acquired prior to the restart of
802 	   the analysis or 0 if not restarted, then subtract the amount needed
803 	   by all of the levels of the tree below n_h. */
804 	/* blknum includes S[n_h], so we subtract 1 in this calculation */
805 	for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum;
806 	     n_counter < n_h; n_counter++)
807 		n_number_of_freeblk -=
808 		    (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] -
809 						   1) : 0;
810 
811 	/* Allocate missing empty blocks. */
812 	/* if p_s_Sh == 0  then we are getting a new root */
813 	n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1;
814 	/*  Amount_needed = the amount that we need more than the amount that we have. */
815 	if (n_amount_needed > n_number_of_freeblk)
816 		n_amount_needed -= n_number_of_freeblk;
817 	else			/* If we have enough already then there is nothing to do. */
818 		return CARRY_ON;
819 
820 	/* No need to check quota - is not allocated for blocks used for formatted nodes */
821 	if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs,
822 				       n_amount_needed) == NO_DISK_SPACE)
823 		return NO_DISK_SPACE;
824 
825 	/* for each blocknumber we just got, get a buffer and stick it on FEB */
826 	for (p_n_blocknr = a_n_blocknrs, n_counter = 0;
827 	     n_counter < n_amount_needed; p_n_blocknr++, n_counter++) {
828 
829 		RFALSE(!*p_n_blocknr,
830 		       "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
831 
832 		p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr);
833 		RFALSE(buffer_dirty(p_s_new_bh) ||
834 		       buffer_journaled(p_s_new_bh) ||
835 		       buffer_journal_dirty(p_s_new_bh),
836 		       "PAP-8140: journlaled or dirty buffer %b for the new block",
837 		       p_s_new_bh);
838 
839 		/* Put empty buffers into the array. */
840 		RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum],
841 		       "PAP-8141: busy slot for new buffer");
842 
843 		set_buffer_journal_new(p_s_new_bh);
844 		p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
845 	}
846 
847 	if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb))
848 		n_retval = REPEAT_SEARCH;
849 
850 	return n_retval;
851 }
852 
853 /* Get free space of the left neighbor, which is stored in the parent
854  * node of the left neighbor.  */
855 static int get_lfree(struct tree_balance *tb, int h)
856 {
857 	struct buffer_head *l, *f;
858 	int order;
859 
860 	if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0)
861 		return 0;
862 
863 	if (f == l)
864 		order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
865 	else {
866 		order = B_NR_ITEMS(l);
867 		f = l;
868 	}
869 
870 	return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
871 }
872 
873 /* Get free space of the right neighbor,
874  * which is stored in the parent node of the right neighbor.
875  */
876 static int get_rfree(struct tree_balance *tb, int h)
877 {
878 	struct buffer_head *r, *f;
879 	int order;
880 
881 	if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0)
882 		return 0;
883 
884 	if (f == r)
885 		order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
886 	else {
887 		order = 0;
888 		f = r;
889 	}
890 
891 	return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
892 
893 }
894 
895 /* Check whether left neighbor is in memory. */
896 static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
897 {
898 	struct buffer_head *p_s_father, *left;
899 	struct super_block *p_s_sb = p_s_tb->tb_sb;
900 	b_blocknr_t n_left_neighbor_blocknr;
901 	int n_left_neighbor_position;
902 
903 	if (!p_s_tb->FL[n_h])	/* Father of the left neighbor does not exist. */
904 		return 0;
905 
906 	/* Calculate father of the node to be balanced. */
907 	p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
908 
909 	RFALSE(!p_s_father ||
910 	       !B_IS_IN_TREE(p_s_father) ||
911 	       !B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
912 	       !buffer_uptodate(p_s_father) ||
913 	       !buffer_uptodate(p_s_tb->FL[n_h]),
914 	       "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
915 	       p_s_father, p_s_tb->FL[n_h]);
916 
917 	/* Get position of the pointer to the left neighbor into the left father. */
918 	n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ?
919 	    p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]);
920 	/* Get left neighbor block number. */
921 	n_left_neighbor_blocknr =
922 	    B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
923 	/* Look for the left neighbor in the cache. */
924 	if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) {
925 
926 		RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
927 		       "vs-8170: left neighbor (%b %z) is not in the tree",
928 		       left, left);
929 		put_bh(left);
930 		return 1;
931 	}
932 
933 	return 0;
934 }
935 
936 #define LEFT_PARENTS  'l'
937 #define RIGHT_PARENTS 'r'
938 
939 static void decrement_key(struct cpu_key *p_s_key)
940 {
941 	// call item specific function for this key
942 	item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key);
943 }
944 
945 /* Calculate far left/right parent of the left/right neighbor of the current node, that
946  * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
947  * Calculate left/right common parent of the current node and L[h]/R[h].
948  * Calculate left/right delimiting key position.
949  * Returns:	PATH_INCORRECT   - path in the tree is not correct;
950  		SCHEDULE_OCCURRED - schedule occurred while the function worked;
951  *	        CARRY_ON         - schedule didn't occur while the function worked;
952  */
953 static int get_far_parent(struct tree_balance *p_s_tb,
954 			  int n_h,
955 			  struct buffer_head **pp_s_father,
956 			  struct buffer_head **pp_s_com_father, char c_lr_par)
957 {
958 	struct buffer_head *p_s_parent;
959 	INITIALIZE_PATH(s_path_to_neighbor_father);
960 	struct treepath *p_s_path = p_s_tb->tb_path;
961 	struct cpu_key s_lr_father_key;
962 	int n_counter,
963 	    n_position = INT_MAX,
964 	    n_first_last_position = 0,
965 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
966 
967 	/* Starting from F[n_h] go upwards in the tree, and look for the common
968 	   ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
969 
970 	n_counter = n_path_offset;
971 
972 	RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET,
973 	       "PAP-8180: invalid path length");
974 
975 	for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) {
976 		/* Check whether parent of the current buffer in the path is really parent in the tree. */
977 		if (!B_IS_IN_TREE
978 		    (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)))
979 			return REPEAT_SEARCH;
980 		/* Check whether position in the parent is correct. */
981 		if ((n_position =
982 		     PATH_OFFSET_POSITION(p_s_path,
983 					  n_counter - 1)) >
984 		    B_NR_ITEMS(p_s_parent))
985 			return REPEAT_SEARCH;
986 		/* Check whether parent at the path really points to the child. */
987 		if (B_N_CHILD_NUM(p_s_parent, n_position) !=
988 		    PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr)
989 			return REPEAT_SEARCH;
990 		/* Return delimiting key if position in the parent is not equal to first/last one. */
991 		if (c_lr_par == RIGHT_PARENTS)
992 			n_first_last_position = B_NR_ITEMS(p_s_parent);
993 		if (n_position != n_first_last_position) {
994 			*pp_s_com_father = p_s_parent;
995 			get_bh(*pp_s_com_father);
996 			/*(*pp_s_com_father = p_s_parent)->b_count++; */
997 			break;
998 		}
999 	}
1000 
1001 	/* if we are in the root of the tree, then there is no common father */
1002 	if (n_counter == FIRST_PATH_ELEMENT_OFFSET) {
1003 		/* Check whether first buffer in the path is the root of the tree. */
1004 		if (PATH_OFFSET_PBUFFER
1005 		    (p_s_tb->tb_path,
1006 		     FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1007 		    SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1008 			*pp_s_father = *pp_s_com_father = NULL;
1009 			return CARRY_ON;
1010 		}
1011 		return REPEAT_SEARCH;
1012 	}
1013 
1014 	RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL,
1015 	       "PAP-8185: (%b %z) level too small",
1016 	       *pp_s_com_father, *pp_s_com_father);
1017 
1018 	/* Check whether the common parent is locked. */
1019 
1020 	if (buffer_locked(*pp_s_com_father)) {
1021 		__wait_on_buffer(*pp_s_com_father);
1022 		if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1023 			decrement_bcount(*pp_s_com_father);
1024 			return REPEAT_SEARCH;
1025 		}
1026 	}
1027 
1028 	/* So, we got common parent of the current node and its left/right neighbor.
1029 	   Now we are geting the parent of the left/right neighbor. */
1030 
1031 	/* Form key to get parent of the left/right neighbor. */
1032 	le_key2cpu_key(&s_lr_father_key,
1033 		       B_N_PDELIM_KEY(*pp_s_com_father,
1034 				      (c_lr_par ==
1035 				       LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] =
1036 							n_position -
1037 							1) : (p_s_tb->rkey[n_h -
1038 									   1] =
1039 							      n_position)));
1040 
1041 	if (c_lr_par == LEFT_PARENTS)
1042 		decrement_key(&s_lr_father_key);
1043 
1044 	if (search_by_key
1045 	    (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1046 	     n_h + 1) == IO_ERROR)
1047 		// path is released
1048 		return IO_ERROR;
1049 
1050 	if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1051 		decrement_counters_in_path(&s_path_to_neighbor_father);
1052 		decrement_bcount(*pp_s_com_father);
1053 		return REPEAT_SEARCH;
1054 	}
1055 
1056 	*pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1057 
1058 	RFALSE(B_LEVEL(*pp_s_father) != n_h + 1,
1059 	       "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father);
1060 	RFALSE(s_path_to_neighbor_father.path_length <
1061 	       FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1062 
1063 	s_path_to_neighbor_father.path_length--;
1064 	decrement_counters_in_path(&s_path_to_neighbor_father);
1065 	return CARRY_ON;
1066 }
1067 
1068 /* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
1069  * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
1070  * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
1071  * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset].
1072  * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
1073  *	        CARRY_ON - schedule didn't occur while the function worked;
1074  */
1075 static int get_parents(struct tree_balance *p_s_tb, int n_h)
1076 {
1077 	struct treepath *p_s_path = p_s_tb->tb_path;
1078 	int n_position,
1079 	    n_ret_value,
1080 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1081 	struct buffer_head *p_s_curf, *p_s_curcf;
1082 
1083 	/* Current node is the root of the tree or will be root of the tree */
1084 	if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1085 		/* The root can not have parents.
1086 		   Release nodes which previously were obtained as parents of the current node neighbors. */
1087 		decrement_bcount(p_s_tb->FL[n_h]);
1088 		decrement_bcount(p_s_tb->CFL[n_h]);
1089 		decrement_bcount(p_s_tb->FR[n_h]);
1090 		decrement_bcount(p_s_tb->CFR[n_h]);
1091 		p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] =
1092 		    p_s_tb->CFR[n_h] = NULL;
1093 		return CARRY_ON;
1094 	}
1095 
1096 	/* Get parent FL[n_path_offset] of L[n_path_offset]. */
1097 	if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) {
1098 		/* Current node is not the first child of its parent. */
1099 		/*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1100 		p_s_curf = p_s_curcf =
1101 		    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1102 		get_bh(p_s_curf);
1103 		get_bh(p_s_curf);
1104 		p_s_tb->lkey[n_h] = n_position - 1;
1105 	} else {
1106 		/* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1107 		   Calculate current common parent of L[n_path_offset] and the current node. Note that
1108 		   CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1109 		   Calculate lkey[n_path_offset]. */
1110 		if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1111 						  &p_s_curcf,
1112 						  LEFT_PARENTS)) != CARRY_ON)
1113 			return n_ret_value;
1114 	}
1115 
1116 	decrement_bcount(p_s_tb->FL[n_h]);
1117 	p_s_tb->FL[n_h] = p_s_curf;	/* New initialization of FL[n_h]. */
1118 	decrement_bcount(p_s_tb->CFL[n_h]);
1119 	p_s_tb->CFL[n_h] = p_s_curcf;	/* New initialization of CFL[n_h]. */
1120 
1121 	RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1122 	       (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1123 	       "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1124 
1125 /* Get parent FR[n_h] of R[n_h]. */
1126 
1127 /* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
1128 	if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) {
1129 /* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
1130    Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
1131    not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
1132 		if ((n_ret_value =
1133 		     get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf,
1134 				    RIGHT_PARENTS)) != CARRY_ON)
1135 			return n_ret_value;
1136 	} else {
1137 /* Current node is not the last child of its parent F[n_h]. */
1138 		/*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1139 		p_s_curf = p_s_curcf =
1140 		    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1141 		get_bh(p_s_curf);
1142 		get_bh(p_s_curf);
1143 		p_s_tb->rkey[n_h] = n_position;
1144 	}
1145 
1146 	decrement_bcount(p_s_tb->FR[n_h]);
1147 	p_s_tb->FR[n_h] = p_s_curf;	/* New initialization of FR[n_path_offset]. */
1148 
1149 	decrement_bcount(p_s_tb->CFR[n_h]);
1150 	p_s_tb->CFR[n_h] = p_s_curcf;	/* New initialization of CFR[n_path_offset]. */
1151 
1152 	RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1153 	       (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1154 	       "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1155 
1156 	return CARRY_ON;
1157 }
1158 
1159 /* it is possible to remove node as result of shiftings to
1160    neighbors even when we insert or paste item. */
1161 static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1162 				      struct tree_balance *tb, int h)
1163 {
1164 	struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1165 	int levbytes = tb->insert_size[h];
1166 	struct item_head *ih;
1167 	struct reiserfs_key *r_key = NULL;
1168 
1169 	ih = B_N_PITEM_HEAD(Sh, 0);
1170 	if (tb->CFR[h])
1171 		r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
1172 
1173 	if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1174 	    /* shifting may merge items which might save space */
1175 	    -
1176 	    ((!h
1177 	      && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
1178 	    -
1179 	    ((!h && r_key
1180 	      && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1181 	    + ((h) ? KEY_SIZE : 0)) {
1182 		/* node can not be removed */
1183 		if (sfree >= levbytes) {	/* new item fits into node S[h] without any shifting */
1184 			if (!h)
1185 				tb->s0num =
1186 				    B_NR_ITEMS(Sh) +
1187 				    ((mode == M_INSERT) ? 1 : 0);
1188 			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1189 			return NO_BALANCING_NEEDED;
1190 		}
1191 	}
1192 	PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1193 	return !NO_BALANCING_NEEDED;
1194 }
1195 
1196 /* Check whether current node S[h] is balanced when increasing its size by
1197  * Inserting or Pasting.
1198  * Calculate parameters for balancing for current level h.
1199  * Parameters:
1200  *	tb	tree_balance structure;
1201  *	h	current level of the node;
1202  *	inum	item number in S[h];
1203  *	mode	i - insert, p - paste;
1204  * Returns:	1 - schedule occurred;
1205  *	        0 - balancing for higher levels needed;
1206  *	       -1 - no balancing for higher levels needed;
1207  *	       -2 - no disk space.
1208  */
1209 /* ip means Inserting or Pasting */
1210 static int ip_check_balance(struct tree_balance *tb, int h)
1211 {
1212 	struct virtual_node *vn = tb->tb_vn;
1213 	int levbytes,		/* Number of bytes that must be inserted into (value
1214 				   is negative if bytes are deleted) buffer which
1215 				   contains node being balanced.  The mnemonic is
1216 				   that the attempted change in node space used level
1217 				   is levbytes bytes. */
1218 	 n_ret_value;
1219 
1220 	int lfree, sfree, rfree /* free space in L, S and R */ ;
1221 
1222 	/* nver is short for number of vertixes, and lnver is the number if
1223 	   we shift to the left, rnver is the number if we shift to the
1224 	   right, and lrnver is the number if we shift in both directions.
1225 	   The goal is to minimize first the number of vertixes, and second,
1226 	   the number of vertixes whose contents are changed by shifting,
1227 	   and third the number of uncached vertixes whose contents are
1228 	   changed by shifting and must be read from disk.  */
1229 	int nver, lnver, rnver, lrnver;
1230 
1231 	/* used at leaf level only, S0 = S[0] is the node being balanced,
1232 	   sInum [ I = 0,1,2 ] is the number of items that will
1233 	   remain in node SI after balancing.  S1 and S2 are new
1234 	   nodes that might be created. */
1235 
1236 	/* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
1237 	   where 4th parameter is s1bytes and 5th - s2bytes
1238 	 */
1239 	short snum012[40] = { 0, };	/* s0num, s1num, s2num for 8 cases
1240 					   0,1 - do not shift and do not shift but bottle
1241 					   2 - shift only whole item to left
1242 					   3 - shift to left and bottle as much as possible
1243 					   4,5 - shift to right (whole items and as much as possible
1244 					   6,7 - shift to both directions (whole items and as much as possible)
1245 					 */
1246 
1247 	/* Sh is the node whose balance is currently being checked */
1248 	struct buffer_head *Sh;
1249 
1250 	Sh = PATH_H_PBUFFER(tb->tb_path, h);
1251 	levbytes = tb->insert_size[h];
1252 
1253 	/* Calculate balance parameters for creating new root. */
1254 	if (!Sh) {
1255 		if (!h)
1256 			reiserfs_panic(tb->tb_sb,
1257 				       "vs-8210: ip_check_balance: S[0] can not be 0");
1258 		switch (n_ret_value = get_empty_nodes(tb, h)) {
1259 		case CARRY_ON:
1260 			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1261 			return NO_BALANCING_NEEDED;	/* no balancing for higher levels needed */
1262 
1263 		case NO_DISK_SPACE:
1264 		case REPEAT_SEARCH:
1265 			return n_ret_value;
1266 		default:
1267 			reiserfs_panic(tb->tb_sb,
1268 				       "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
1269 		}
1270 	}
1271 
1272 	if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)	/* get parents of S[h] neighbors. */
1273 		return n_ret_value;
1274 
1275 	sfree = B_FREE_SPACE(Sh);
1276 
1277 	/* get free space of neighbors */
1278 	rfree = get_rfree(tb, h);
1279 	lfree = get_lfree(tb, h);
1280 
1281 	if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1282 	    NO_BALANCING_NEEDED)
1283 		/* and new item fits into node S[h] without any shifting */
1284 		return NO_BALANCING_NEEDED;
1285 
1286 	create_virtual_node(tb, h);
1287 
1288 	/*
1289 	   determine maximal number of items we can shift to the left neighbor (in tb structure)
1290 	   and the maximal number of bytes that can flow to the left neighbor
1291 	   from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1292 	 */
1293 	check_left(tb, h, lfree);
1294 
1295 	/*
1296 	   determine maximal number of items we can shift to the right neighbor (in tb structure)
1297 	   and the maximal number of bytes that can flow to the right neighbor
1298 	   from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1299 	 */
1300 	check_right(tb, h, rfree);
1301 
1302 	/* all contents of internal node S[h] can be moved into its
1303 	   neighbors, S[h] will be removed after balancing */
1304 	if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1305 		int to_r;
1306 
1307 		/* Since we are working on internal nodes, and our internal
1308 		   nodes have fixed size entries, then we can balance by the
1309 		   number of items rather than the space they consume.  In this
1310 		   routine we set the left node equal to the right node,
1311 		   allowing a difference of less than or equal to 1 child
1312 		   pointer. */
1313 		to_r =
1314 		    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1315 		     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1316 						tb->rnum[h]);
1317 		set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1318 			       -1, -1);
1319 		return CARRY_ON;
1320 	}
1321 
1322 	/* this checks balance condition, that any two neighboring nodes can not fit in one node */
1323 	RFALSE(h &&
1324 	       (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1325 		tb->rnum[h] >= vn->vn_nr_item + 1),
1326 	       "vs-8220: tree is not balanced on internal level");
1327 	RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1328 		      (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1329 	       "vs-8225: tree is not balanced on leaf level");
1330 
1331 	/* all contents of S[0] can be moved into its neighbors
1332 	   S[0] will be removed after balancing. */
1333 	if (!h && is_leaf_removable(tb))
1334 		return CARRY_ON;
1335 
1336 	/* why do we perform this check here rather than earlier??
1337 	   Answer: we can win 1 node in some cases above. Moreover we
1338 	   checked it above, when we checked, that S[0] is not removable
1339 	   in principle */
1340 	if (sfree >= levbytes) {	/* new item fits into node S[h] without any shifting */
1341 		if (!h)
1342 			tb->s0num = vn->vn_nr_item;
1343 		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1344 		return NO_BALANCING_NEEDED;
1345 	}
1346 
1347 	{
1348 		int lpar, rpar, nset, lset, rset, lrset;
1349 		/*
1350 		 * regular overflowing of the node
1351 		 */
1352 
1353 		/* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1354 		   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1355 		   nset, lset, rset, lrset - shows, whether flowing items give better packing
1356 		 */
1357 #define FLOW 1
1358 #define NO_FLOW 0		/* do not any splitting */
1359 
1360 		/* we choose one the following */
1361 #define NOTHING_SHIFT_NO_FLOW	0
1362 #define NOTHING_SHIFT_FLOW	5
1363 #define LEFT_SHIFT_NO_FLOW	10
1364 #define LEFT_SHIFT_FLOW		15
1365 #define RIGHT_SHIFT_NO_FLOW	20
1366 #define RIGHT_SHIFT_FLOW	25
1367 #define LR_SHIFT_NO_FLOW	30
1368 #define LR_SHIFT_FLOW		35
1369 
1370 		lpar = tb->lnum[h];
1371 		rpar = tb->rnum[h];
1372 
1373 		/* calculate number of blocks S[h] must be split into when
1374 		   nothing is shifted to the neighbors,
1375 		   as well as number of items in each part of the split node (s012 numbers),
1376 		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1377 		nset = NOTHING_SHIFT_NO_FLOW;
1378 		nver = get_num_ver(vn->vn_mode, tb, h,
1379 				   0, -1, h ? vn->vn_nr_item : 0, -1,
1380 				   snum012, NO_FLOW);
1381 
1382 		if (!h) {
1383 			int nver1;
1384 
1385 			/* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1386 			nver1 = get_num_ver(vn->vn_mode, tb, h,
1387 					    0, -1, 0, -1,
1388 					    snum012 + NOTHING_SHIFT_FLOW, FLOW);
1389 			if (nver > nver1)
1390 				nset = NOTHING_SHIFT_FLOW, nver = nver1;
1391 		}
1392 
1393 		/* calculate number of blocks S[h] must be split into when
1394 		   l_shift_num first items and l_shift_bytes of the right most
1395 		   liquid item to be shifted are shifted to the left neighbor,
1396 		   as well as number of items in each part of the splitted node (s012 numbers),
1397 		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1398 		 */
1399 		lset = LEFT_SHIFT_NO_FLOW;
1400 		lnver = get_num_ver(vn->vn_mode, tb, h,
1401 				    lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1402 				    -1, h ? vn->vn_nr_item : 0, -1,
1403 				    snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1404 		if (!h) {
1405 			int lnver1;
1406 
1407 			lnver1 = get_num_ver(vn->vn_mode, tb, h,
1408 					     lpar -
1409 					     ((tb->lbytes != -1) ? 1 : 0),
1410 					     tb->lbytes, 0, -1,
1411 					     snum012 + LEFT_SHIFT_FLOW, FLOW);
1412 			if (lnver > lnver1)
1413 				lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1414 		}
1415 
1416 		/* calculate number of blocks S[h] must be split into when
1417 		   r_shift_num first items and r_shift_bytes of the left most
1418 		   liquid item to be shifted are shifted to the right neighbor,
1419 		   as well as number of items in each part of the splitted node (s012 numbers),
1420 		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1421 		 */
1422 		rset = RIGHT_SHIFT_NO_FLOW;
1423 		rnver = get_num_ver(vn->vn_mode, tb, h,
1424 				    0, -1,
1425 				    h ? (vn->vn_nr_item - rpar) : (rpar -
1426 								   ((tb->
1427 								     rbytes !=
1428 								     -1) ? 1 :
1429 								    0)), -1,
1430 				    snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1431 		if (!h) {
1432 			int rnver1;
1433 
1434 			rnver1 = get_num_ver(vn->vn_mode, tb, h,
1435 					     0, -1,
1436 					     (rpar -
1437 					      ((tb->rbytes != -1) ? 1 : 0)),
1438 					     tb->rbytes,
1439 					     snum012 + RIGHT_SHIFT_FLOW, FLOW);
1440 
1441 			if (rnver > rnver1)
1442 				rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1443 		}
1444 
1445 		/* calculate number of blocks S[h] must be split into when
1446 		   items are shifted in both directions,
1447 		   as well as number of items in each part of the splitted node (s012 numbers),
1448 		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1449 		 */
1450 		lrset = LR_SHIFT_NO_FLOW;
1451 		lrnver = get_num_ver(vn->vn_mode, tb, h,
1452 				     lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1453 				     -1,
1454 				     h ? (vn->vn_nr_item - rpar) : (rpar -
1455 								    ((tb->
1456 								      rbytes !=
1457 								      -1) ? 1 :
1458 								     0)), -1,
1459 				     snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1460 		if (!h) {
1461 			int lrnver1;
1462 
1463 			lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1464 					      lpar -
1465 					      ((tb->lbytes != -1) ? 1 : 0),
1466 					      tb->lbytes,
1467 					      (rpar -
1468 					       ((tb->rbytes != -1) ? 1 : 0)),
1469 					      tb->rbytes,
1470 					      snum012 + LR_SHIFT_FLOW, FLOW);
1471 			if (lrnver > lrnver1)
1472 				lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1473 		}
1474 
1475 		/* Our general shifting strategy is:
1476 		   1) to minimized number of new nodes;
1477 		   2) to minimized number of neighbors involved in shifting;
1478 		   3) to minimized number of disk reads; */
1479 
1480 		/* we can win TWO or ONE nodes by shifting in both directions */
1481 		if (lrnver < lnver && lrnver < rnver) {
1482 			RFALSE(h &&
1483 			       (tb->lnum[h] != 1 ||
1484 				tb->rnum[h] != 1 ||
1485 				lrnver != 1 || rnver != 2 || lnver != 2
1486 				|| h != 1), "vs-8230: bad h");
1487 			if (lrset == LR_SHIFT_FLOW)
1488 				set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1489 					       lrnver, snum012 + lrset,
1490 					       tb->lbytes, tb->rbytes);
1491 			else
1492 				set_parameters(tb, h,
1493 					       tb->lnum[h] -
1494 					       ((tb->lbytes == -1) ? 0 : 1),
1495 					       tb->rnum[h] -
1496 					       ((tb->rbytes == -1) ? 0 : 1),
1497 					       lrnver, snum012 + lrset, -1, -1);
1498 
1499 			return CARRY_ON;
1500 		}
1501 
1502 		/* if shifting doesn't lead to better packing then don't shift */
1503 		if (nver == lrnver) {
1504 			set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1505 				       -1);
1506 			return CARRY_ON;
1507 		}
1508 
1509 		/* now we know that for better packing shifting in only one
1510 		   direction either to the left or to the right is required */
1511 
1512 		/*  if shifting to the left is better than shifting to the right */
1513 		if (lnver < rnver) {
1514 			SET_PAR_SHIFT_LEFT;
1515 			return CARRY_ON;
1516 		}
1517 
1518 		/* if shifting to the right is better than shifting to the left */
1519 		if (lnver > rnver) {
1520 			SET_PAR_SHIFT_RIGHT;
1521 			return CARRY_ON;
1522 		}
1523 
1524 		/* now shifting in either direction gives the same number
1525 		   of nodes and we can make use of the cached neighbors */
1526 		if (is_left_neighbor_in_cache(tb, h)) {
1527 			SET_PAR_SHIFT_LEFT;
1528 			return CARRY_ON;
1529 		}
1530 
1531 		/* shift to the right independently on whether the right neighbor in cache or not */
1532 		SET_PAR_SHIFT_RIGHT;
1533 		return CARRY_ON;
1534 	}
1535 }
1536 
1537 /* Check whether current node S[h] is balanced when Decreasing its size by
1538  * Deleting or Cutting for INTERNAL node of S+tree.
1539  * Calculate parameters for balancing for current level h.
1540  * Parameters:
1541  *	tb	tree_balance structure;
1542  *	h	current level of the node;
1543  *	inum	item number in S[h];
1544  *	mode	i - insert, p - paste;
1545  * Returns:	1 - schedule occurred;
1546  *	        0 - balancing for higher levels needed;
1547  *	       -1 - no balancing for higher levels needed;
1548  *	       -2 - no disk space.
1549  *
1550  * Note: Items of internal nodes have fixed size, so the balance condition for
1551  * the internal part of S+tree is as for the B-trees.
1552  */
1553 static int dc_check_balance_internal(struct tree_balance *tb, int h)
1554 {
1555 	struct virtual_node *vn = tb->tb_vn;
1556 
1557 	/* Sh is the node whose balance is currently being checked,
1558 	   and Fh is its father.  */
1559 	struct buffer_head *Sh, *Fh;
1560 	int maxsize, n_ret_value;
1561 	int lfree, rfree /* free space in L and R */ ;
1562 
1563 	Sh = PATH_H_PBUFFER(tb->tb_path, h);
1564 	Fh = PATH_H_PPARENT(tb->tb_path, h);
1565 
1566 	maxsize = MAX_CHILD_SIZE(Sh);
1567 
1568 /*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1569 /*   new_nr_item = number of items node would have if operation is */
1570 /* 	performed without balancing (new_nr_item); */
1571 	create_virtual_node(tb, h);
1572 
1573 	if (!Fh) {		/* S[h] is the root. */
1574 		if (vn->vn_nr_item > 0) {
1575 			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1576 			return NO_BALANCING_NEEDED;	/* no balancing for higher levels needed */
1577 		}
1578 		/* new_nr_item == 0.
1579 		 * Current root will be deleted resulting in
1580 		 * decrementing the tree height. */
1581 		set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1582 		return CARRY_ON;
1583 	}
1584 
1585 	if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1586 		return n_ret_value;
1587 
1588 	/* get free space of neighbors */
1589 	rfree = get_rfree(tb, h);
1590 	lfree = get_lfree(tb, h);
1591 
1592 	/* determine maximal number of items we can fit into neighbors */
1593 	check_left(tb, h, lfree);
1594 	check_right(tb, h, rfree);
1595 
1596 	if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) {	/* Balance condition for the internal node is valid.
1597 						 * In this case we balance only if it leads to better packing. */
1598 		if (vn->vn_nr_item == MIN_NR_KEY(Sh)) {	/* Here we join S[h] with one of its neighbors,
1599 							 * which is impossible with greater values of new_nr_item. */
1600 			if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1601 				/* All contents of S[h] can be moved to L[h]. */
1602 				int n;
1603 				int order_L;
1604 
1605 				order_L =
1606 				    ((n =
1607 				      PATH_H_B_ITEM_ORDER(tb->tb_path,
1608 							  h)) ==
1609 				     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1610 				n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1611 				    (DC_SIZE + KEY_SIZE);
1612 				set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1613 					       -1);
1614 				return CARRY_ON;
1615 			}
1616 
1617 			if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1618 				/* All contents of S[h] can be moved to R[h]. */
1619 				int n;
1620 				int order_R;
1621 
1622 				order_R =
1623 				    ((n =
1624 				      PATH_H_B_ITEM_ORDER(tb->tb_path,
1625 							  h)) ==
1626 				     B_NR_ITEMS(Fh)) ? 0 : n + 1;
1627 				n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1628 				    (DC_SIZE + KEY_SIZE);
1629 				set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1630 					       -1);
1631 				return CARRY_ON;
1632 			}
1633 		}
1634 
1635 		if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1636 			/* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1637 			int to_r;
1638 
1639 			to_r =
1640 			    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1641 			     tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1642 			    (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1643 			set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1644 				       0, NULL, -1, -1);
1645 			return CARRY_ON;
1646 		}
1647 
1648 		/* Balancing does not lead to better packing. */
1649 		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1650 		return NO_BALANCING_NEEDED;
1651 	}
1652 
1653 	/* Current node contain insufficient number of items. Balancing is required. */
1654 	/* Check whether we can merge S[h] with left neighbor. */
1655 	if (tb->lnum[h] >= vn->vn_nr_item + 1)
1656 		if (is_left_neighbor_in_cache(tb, h)
1657 		    || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1658 			int n;
1659 			int order_L;
1660 
1661 			order_L =
1662 			    ((n =
1663 			      PATH_H_B_ITEM_ORDER(tb->tb_path,
1664 						  h)) ==
1665 			     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1666 			n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1667 								      KEY_SIZE);
1668 			set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1669 			return CARRY_ON;
1670 		}
1671 
1672 	/* Check whether we can merge S[h] with right neighbor. */
1673 	if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1674 		int n;
1675 		int order_R;
1676 
1677 		order_R =
1678 		    ((n =
1679 		      PATH_H_B_ITEM_ORDER(tb->tb_path,
1680 					  h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1681 		n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1682 							      KEY_SIZE);
1683 		set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1684 		return CARRY_ON;
1685 	}
1686 
1687 	/* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1688 	if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1689 		int to_r;
1690 
1691 		to_r =
1692 		    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1693 		     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1694 						tb->rnum[h]);
1695 		set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1696 			       -1, -1);
1697 		return CARRY_ON;
1698 	}
1699 
1700 	/* For internal nodes try to borrow item from a neighbor */
1701 	RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1702 
1703 	/* Borrow one or two items from caching neighbor */
1704 	if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1705 		int from_l;
1706 
1707 		from_l =
1708 		    (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1709 		     1) / 2 - (vn->vn_nr_item + 1);
1710 		set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1711 		return CARRY_ON;
1712 	}
1713 
1714 	set_parameters(tb, h, 0,
1715 		       -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1716 			  1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1717 	return CARRY_ON;
1718 }
1719 
1720 /* Check whether current node S[h] is balanced when Decreasing its size by
1721  * Deleting or Truncating for LEAF node of S+tree.
1722  * Calculate parameters for balancing for current level h.
1723  * Parameters:
1724  *	tb	tree_balance structure;
1725  *	h	current level of the node;
1726  *	inum	item number in S[h];
1727  *	mode	i - insert, p - paste;
1728  * Returns:	1 - schedule occurred;
1729  *	        0 - balancing for higher levels needed;
1730  *	       -1 - no balancing for higher levels needed;
1731  *	       -2 - no disk space.
1732  */
1733 static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1734 {
1735 	struct virtual_node *vn = tb->tb_vn;
1736 
1737 	/* Number of bytes that must be deleted from
1738 	   (value is negative if bytes are deleted) buffer which
1739 	   contains node being balanced.  The mnemonic is that the
1740 	   attempted change in node space used level is levbytes bytes. */
1741 	int levbytes;
1742 	/* the maximal item size */
1743 	int maxsize, n_ret_value;
1744 	/* S0 is the node whose balance is currently being checked,
1745 	   and F0 is its father.  */
1746 	struct buffer_head *S0, *F0;
1747 	int lfree, rfree /* free space in L and R */ ;
1748 
1749 	S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1750 	F0 = PATH_H_PPARENT(tb->tb_path, 0);
1751 
1752 	levbytes = tb->insert_size[h];
1753 
1754 	maxsize = MAX_CHILD_SIZE(S0);	/* maximal possible size of an item */
1755 
1756 	if (!F0) {		/* S[0] is the root now. */
1757 
1758 		RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1759 		       "vs-8240: attempt to create empty buffer tree");
1760 
1761 		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1762 		return NO_BALANCING_NEEDED;
1763 	}
1764 
1765 	if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1766 		return n_ret_value;
1767 
1768 	/* get free space of neighbors */
1769 	rfree = get_rfree(tb, h);
1770 	lfree = get_lfree(tb, h);
1771 
1772 	create_virtual_node(tb, h);
1773 
1774 	/* if 3 leaves can be merge to one, set parameters and return */
1775 	if (are_leaves_removable(tb, lfree, rfree))
1776 		return CARRY_ON;
1777 
1778 	/* determine maximal number of items we can shift to the left/right  neighbor
1779 	   and the maximal number of bytes that can flow to the left/right neighbor
1780 	   from the left/right most liquid item that cannot be shifted from S[0] entirely
1781 	 */
1782 	check_left(tb, h, lfree);
1783 	check_right(tb, h, rfree);
1784 
1785 	/* check whether we can merge S with left neighbor. */
1786 	if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1787 		if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) ||	/* S can not be merged with R */
1788 		    !tb->FR[h]) {
1789 
1790 			RFALSE(!tb->FL[h],
1791 			       "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1792 
1793 			/* set parameter to merge S[0] with its left neighbor */
1794 			set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1795 			return CARRY_ON;
1796 		}
1797 
1798 	/* check whether we can merge S[0] with right neighbor. */
1799 	if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1800 		set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
1801 		return CARRY_ON;
1802 	}
1803 
1804 	/* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1805 	if (is_leaf_removable(tb))
1806 		return CARRY_ON;
1807 
1808 	/* Balancing is not required. */
1809 	tb->s0num = vn->vn_nr_item;
1810 	set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1811 	return NO_BALANCING_NEEDED;
1812 }
1813 
1814 /* Check whether current node S[h] is balanced when Decreasing its size by
1815  * Deleting or Cutting.
1816  * Calculate parameters for balancing for current level h.
1817  * Parameters:
1818  *	tb	tree_balance structure;
1819  *	h	current level of the node;
1820  *	inum	item number in S[h];
1821  *	mode	d - delete, c - cut.
1822  * Returns:	1 - schedule occurred;
1823  *	        0 - balancing for higher levels needed;
1824  *	       -1 - no balancing for higher levels needed;
1825  *	       -2 - no disk space.
1826  */
1827 static int dc_check_balance(struct tree_balance *tb, int h)
1828 {
1829 	RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
1830 	       "vs-8250: S is not initialized");
1831 
1832 	if (h)
1833 		return dc_check_balance_internal(tb, h);
1834 	else
1835 		return dc_check_balance_leaf(tb, h);
1836 }
1837 
1838 /* Check whether current node S[h] is balanced.
1839  * Calculate parameters for balancing for current level h.
1840  * Parameters:
1841  *
1842  *	tb	tree_balance structure:
1843  *
1844  *              tb is a large structure that must be read about in the header file
1845  *              at the same time as this procedure if the reader is to successfully
1846  *              understand this procedure
1847  *
1848  *	h	current level of the node;
1849  *	inum	item number in S[h];
1850  *	mode	i - insert, p - paste, d - delete, c - cut.
1851  * Returns:	1 - schedule occurred;
1852  *	        0 - balancing for higher levels needed;
1853  *	       -1 - no balancing for higher levels needed;
1854  *	       -2 - no disk space.
1855  */
1856 static int check_balance(int mode,
1857 			 struct tree_balance *tb,
1858 			 int h,
1859 			 int inum,
1860 			 int pos_in_item,
1861 			 struct item_head *ins_ih, const void *data)
1862 {
1863 	struct virtual_node *vn;
1864 
1865 	vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1866 	vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1867 	vn->vn_mode = mode;
1868 	vn->vn_affected_item_num = inum;
1869 	vn->vn_pos_in_item = pos_in_item;
1870 	vn->vn_ins_ih = ins_ih;
1871 	vn->vn_data = data;
1872 
1873 	RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1874 	       "vs-8255: ins_ih can not be 0 in insert mode");
1875 
1876 	if (tb->insert_size[h] > 0)
1877 		/* Calculate balance parameters when size of node is increasing. */
1878 		return ip_check_balance(tb, h);
1879 
1880 	/* Calculate balance parameters when  size of node is decreasing. */
1881 	return dc_check_balance(tb, h);
1882 }
1883 
1884 /* Check whether parent at the path is the really parent of the current node.*/
1885 static int get_direct_parent(struct tree_balance *p_s_tb, int n_h)
1886 {
1887 	struct buffer_head *p_s_bh;
1888 	struct treepath *p_s_path = p_s_tb->tb_path;
1889 	int n_position,
1890 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1891 
1892 	/* We are in the root or in the new root. */
1893 	if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1894 
1895 		RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1896 		       "PAP-8260: invalid offset in the path");
1897 
1898 		if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->
1899 		    b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1900 			/* Root is not changed. */
1901 			PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1902 			PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1903 			return CARRY_ON;
1904 		}
1905 		return REPEAT_SEARCH;	/* Root is changed and we must recalculate the path. */
1906 	}
1907 
1908 	if (!B_IS_IN_TREE
1909 	    (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)))
1910 		return REPEAT_SEARCH;	/* Parent in the path is not in the tree. */
1911 
1912 	if ((n_position =
1913 	     PATH_OFFSET_POSITION(p_s_path,
1914 				  n_path_offset - 1)) > B_NR_ITEMS(p_s_bh))
1915 		return REPEAT_SEARCH;
1916 
1917 	if (B_N_CHILD_NUM(p_s_bh, n_position) !=
1918 	    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr)
1919 		/* Parent in the path is not parent of the current node in the tree. */
1920 		return REPEAT_SEARCH;
1921 
1922 	if (buffer_locked(p_s_bh)) {
1923 		__wait_on_buffer(p_s_bh);
1924 		if (FILESYSTEM_CHANGED_TB(p_s_tb))
1925 			return REPEAT_SEARCH;
1926 	}
1927 
1928 	return CARRY_ON;	/* Parent in the path is unlocked and really parent of the current node.  */
1929 }
1930 
1931 /* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
1932  * of S[n_h] we
1933  * need in order to balance S[n_h], and get them if necessary.
1934  * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
1935  *	        CARRY_ON - schedule didn't occur while the function worked;
1936  */
1937 static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
1938 {
1939 	int n_child_position,
1940 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
1941 	unsigned long n_son_number;
1942 	struct super_block *p_s_sb = p_s_tb->tb_sb;
1943 	struct buffer_head *p_s_bh;
1944 
1945 	PROC_INFO_INC(p_s_sb, get_neighbors[n_h]);
1946 
1947 	if (p_s_tb->lnum[n_h]) {
1948 		/* We need left neighbor to balance S[n_h]. */
1949 		PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]);
1950 		p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1951 
1952 		RFALSE(p_s_bh == p_s_tb->FL[n_h] &&
1953 		       !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset),
1954 		       "PAP-8270: invalid position in the parent");
1955 
1956 		n_child_position =
1957 		    (p_s_bh ==
1958 		     p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->
1959 								       FL[n_h]);
1960 		n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
1961 		p_s_bh = sb_bread(p_s_sb, n_son_number);
1962 		if (!p_s_bh)
1963 			return IO_ERROR;
1964 		if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1965 			decrement_bcount(p_s_bh);
1966 			PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
1967 			return REPEAT_SEARCH;
1968 		}
1969 
1970 		RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
1971 		       n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
1972 		       B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) !=
1973 		       p_s_bh->b_blocknr, "PAP-8275: invalid parent");
1974 		RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child");
1975 		RFALSE(!n_h &&
1976 		       B_FREE_SPACE(p_s_bh) !=
1977 		       MAX_CHILD_SIZE(p_s_bh) -
1978 		       dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)),
1979 		       "PAP-8290: invalid child size of left neighbor");
1980 
1981 		decrement_bcount(p_s_tb->L[n_h]);
1982 		p_s_tb->L[n_h] = p_s_bh;
1983 	}
1984 
1985 	if (p_s_tb->rnum[n_h]) {	/* We need right neighbor to balance S[n_path_offset]. */
1986 		PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]);
1987 		p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1988 
1989 		RFALSE(p_s_bh == p_s_tb->FR[n_h] &&
1990 		       PATH_OFFSET_POSITION(p_s_tb->tb_path,
1991 					    n_path_offset) >=
1992 		       B_NR_ITEMS(p_s_bh),
1993 		       "PAP-8295: invalid position in the parent");
1994 
1995 		n_child_position =
1996 		    (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0;
1997 		n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
1998 		p_s_bh = sb_bread(p_s_sb, n_son_number);
1999 		if (!p_s_bh)
2000 			return IO_ERROR;
2001 		if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2002 			decrement_bcount(p_s_bh);
2003 			PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
2004 			return REPEAT_SEARCH;
2005 		}
2006 		decrement_bcount(p_s_tb->R[n_h]);
2007 		p_s_tb->R[n_h] = p_s_bh;
2008 
2009 		RFALSE(!n_h
2010 		       && B_FREE_SPACE(p_s_bh) !=
2011 		       MAX_CHILD_SIZE(p_s_bh) -
2012 		       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)),
2013 		       "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
2014 		       B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh),
2015 		       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)));
2016 
2017 	}
2018 	return CARRY_ON;
2019 }
2020 
2021 static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2022 {
2023 	int max_num_of_items;
2024 	int max_num_of_entries;
2025 	unsigned long blocksize = sb->s_blocksize;
2026 
2027 #define MIN_NAME_LEN 1
2028 
2029 	max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2030 	max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2031 	    (DEH_SIZE + MIN_NAME_LEN);
2032 
2033 	return sizeof(struct virtual_node) +
2034 	    max(max_num_of_items * sizeof(struct virtual_item),
2035 		sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
2036 		(max_num_of_entries - 1) * sizeof(__u16));
2037 }
2038 
2039 /* maybe we should fail balancing we are going to perform when kmalloc
2040    fails several times. But now it will loop until kmalloc gets
2041    required memory */
2042 static int get_mem_for_virtual_node(struct tree_balance *tb)
2043 {
2044 	int check_fs = 0;
2045 	int size;
2046 	char *buf;
2047 
2048 	size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2049 
2050 	if (size > tb->vn_buf_size) {
2051 		/* we have to allocate more memory for virtual node */
2052 		if (tb->vn_buf) {
2053 			/* free memory allocated before */
2054 			kfree(tb->vn_buf);
2055 			/* this is not needed if kfree is atomic */
2056 			check_fs = 1;
2057 		}
2058 
2059 		/* virtual node requires now more memory */
2060 		tb->vn_buf_size = size;
2061 
2062 		/* get memory for virtual item */
2063 		buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2064 		if (!buf) {
2065 			/* getting memory with GFP_KERNEL priority may involve
2066 			   balancing now (due to indirect_to_direct conversion on
2067 			   dcache shrinking). So, release path and collected
2068 			   resources here */
2069 			free_buffers_in_tb(tb);
2070 			buf = kmalloc(size, GFP_NOFS);
2071 			if (!buf) {
2072 				tb->vn_buf_size = 0;
2073 			}
2074 			tb->vn_buf = buf;
2075 			schedule();
2076 			return REPEAT_SEARCH;
2077 		}
2078 
2079 		tb->vn_buf = buf;
2080 	}
2081 
2082 	if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2083 		return REPEAT_SEARCH;
2084 
2085 	return CARRY_ON;
2086 }
2087 
2088 #ifdef CONFIG_REISERFS_CHECK
2089 static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2090 				   struct buffer_head *p_s_bh,
2091 				   const char *descr, int level)
2092 {
2093 	if (p_s_bh) {
2094 		if (atomic_read(&(p_s_bh->b_count)) <= 0) {
2095 
2096 			reiserfs_panic(p_s_sb,
2097 				       "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n",
2098 				       descr, level, p_s_bh);
2099 		}
2100 
2101 		if (!buffer_uptodate(p_s_bh)) {
2102 			reiserfs_panic(p_s_sb,
2103 				       "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n",
2104 				       descr, level, p_s_bh);
2105 		}
2106 
2107 		if (!B_IS_IN_TREE(p_s_bh)) {
2108 			reiserfs_panic(p_s_sb,
2109 				       "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n",
2110 				       descr, level, p_s_bh);
2111 		}
2112 
2113 		if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
2114 			reiserfs_panic(p_s_sb,
2115 				       "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n",
2116 				       descr, level, p_s_bh);
2117 		}
2118 
2119 		if (p_s_bh->b_size != p_s_sb->s_blocksize) {
2120 			reiserfs_panic(p_s_sb,
2121 				       "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n",
2122 				       descr, level, p_s_bh);
2123 		}
2124 
2125 		if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2126 			reiserfs_panic(p_s_sb,
2127 				       "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n",
2128 				       descr, level, p_s_bh);
2129 		}
2130 	}
2131 }
2132 #else
2133 static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2134 				   struct buffer_head *p_s_bh,
2135 				   const char *descr, int level)
2136 {;
2137 }
2138 #endif
2139 
2140 static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2141 {
2142 	return reiserfs_prepare_for_journal(s, bh, 0);
2143 }
2144 
2145 static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2146 {
2147 	struct buffer_head *locked;
2148 #ifdef CONFIG_REISERFS_CHECK
2149 	int repeat_counter = 0;
2150 #endif
2151 	int i;
2152 
2153 	do {
2154 
2155 		locked = NULL;
2156 
2157 		for (i = p_s_tb->tb_path->path_length;
2158 		     !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2159 			if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2160 				/* if I understand correctly, we can only be sure the last buffer
2161 				 ** in the path is in the tree --clm
2162 				 */
2163 #ifdef CONFIG_REISERFS_CHECK
2164 				if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2165 				    PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2166 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2167 							       PATH_OFFSET_PBUFFER
2168 							       (p_s_tb->tb_path,
2169 								i), "S",
2170 							       p_s_tb->tb_path->
2171 							       path_length - i);
2172 				}
2173 #endif
2174 				if (!clear_all_dirty_bits(p_s_tb->tb_sb,
2175 							  PATH_OFFSET_PBUFFER
2176 							  (p_s_tb->tb_path,
2177 							   i))) {
2178 					locked =
2179 					    PATH_OFFSET_PBUFFER(p_s_tb->tb_path,
2180 								i);
2181 				}
2182 			}
2183 		}
2184 
2185 		for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i];
2186 		     i++) {
2187 
2188 			if (p_s_tb->lnum[i]) {
2189 
2190 				if (p_s_tb->L[i]) {
2191 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2192 							       p_s_tb->L[i],
2193 							       "L", i);
2194 					if (!clear_all_dirty_bits
2195 					    (p_s_tb->tb_sb, p_s_tb->L[i]))
2196 						locked = p_s_tb->L[i];
2197 				}
2198 
2199 				if (!locked && p_s_tb->FL[i]) {
2200 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2201 							       p_s_tb->FL[i],
2202 							       "FL", i);
2203 					if (!clear_all_dirty_bits
2204 					    (p_s_tb->tb_sb, p_s_tb->FL[i]))
2205 						locked = p_s_tb->FL[i];
2206 				}
2207 
2208 				if (!locked && p_s_tb->CFL[i]) {
2209 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2210 							       p_s_tb->CFL[i],
2211 							       "CFL", i);
2212 					if (!clear_all_dirty_bits
2213 					    (p_s_tb->tb_sb, p_s_tb->CFL[i]))
2214 						locked = p_s_tb->CFL[i];
2215 				}
2216 
2217 			}
2218 
2219 			if (!locked && (p_s_tb->rnum[i])) {
2220 
2221 				if (p_s_tb->R[i]) {
2222 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2223 							       p_s_tb->R[i],
2224 							       "R", i);
2225 					if (!clear_all_dirty_bits
2226 					    (p_s_tb->tb_sb, p_s_tb->R[i]))
2227 						locked = p_s_tb->R[i];
2228 				}
2229 
2230 				if (!locked && p_s_tb->FR[i]) {
2231 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2232 							       p_s_tb->FR[i],
2233 							       "FR", i);
2234 					if (!clear_all_dirty_bits
2235 					    (p_s_tb->tb_sb, p_s_tb->FR[i]))
2236 						locked = p_s_tb->FR[i];
2237 				}
2238 
2239 				if (!locked && p_s_tb->CFR[i]) {
2240 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2241 							       p_s_tb->CFR[i],
2242 							       "CFR", i);
2243 					if (!clear_all_dirty_bits
2244 					    (p_s_tb->tb_sb, p_s_tb->CFR[i]))
2245 						locked = p_s_tb->CFR[i];
2246 				}
2247 			}
2248 		}
2249 		/* as far as I can tell, this is not required.  The FEB list seems
2250 		 ** to be full of newly allocated nodes, which will never be locked,
2251 		 ** dirty, or anything else.
2252 		 ** To be safe, I'm putting in the checks and waits in.  For the moment,
2253 		 ** they are needed to keep the code in journal.c from complaining
2254 		 ** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
2255 		 ** --clm
2256 		 */
2257 		for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2258 			if (p_s_tb->FEB[i]) {
2259 				if (!clear_all_dirty_bits
2260 				    (p_s_tb->tb_sb, p_s_tb->FEB[i]))
2261 					locked = p_s_tb->FEB[i];
2262 			}
2263 		}
2264 
2265 		if (locked) {
2266 #ifdef CONFIG_REISERFS_CHECK
2267 			repeat_counter++;
2268 			if ((repeat_counter % 10000) == 0) {
2269 				reiserfs_warning(p_s_tb->tb_sb,
2270 						 "wait_tb_buffers_until_released(): too many "
2271 						 "iterations waiting for buffer to unlock "
2272 						 "(%b)", locked);
2273 
2274 				/* Don't loop forever.  Try to recover from possible error. */
2275 
2276 				return (FILESYSTEM_CHANGED_TB(p_s_tb)) ?
2277 				    REPEAT_SEARCH : CARRY_ON;
2278 			}
2279 #endif
2280 			__wait_on_buffer(locked);
2281 			if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2282 				return REPEAT_SEARCH;
2283 			}
2284 		}
2285 
2286 	} while (locked);
2287 
2288 	return CARRY_ON;
2289 }
2290 
2291 /* Prepare for balancing, that is
2292  *	get all necessary parents, and neighbors;
2293  *	analyze what and where should be moved;
2294  *	get sufficient number of new nodes;
2295  * Balancing will start only after all resources will be collected at a time.
2296  *
2297  * When ported to SMP kernels, only at the last moment after all needed nodes
2298  * are collected in cache, will the resources be locked using the usual
2299  * textbook ordered lock acquisition algorithms.  Note that ensuring that
2300  * this code neither write locks what it does not need to write lock nor locks out of order
2301  * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
2302  *
2303  * fix is meant in the sense of render unchanging
2304  *
2305  * Latency might be improved by first gathering a list of what buffers are needed
2306  * and then getting as many of them in parallel as possible? -Hans
2307  *
2308  * Parameters:
2309  *	op_mode	i - insert, d - delete, c - cut (truncate), p - paste (append)
2310  *	tb	tree_balance structure;
2311  *	inum	item number in S[h];
2312  *      pos_in_item - comment this if you can
2313  *      ins_ih & ins_sd are used when inserting
2314  * Returns:	1 - schedule occurred while the function worked;
2315  *	        0 - schedule didn't occur while the function worked;
2316  *             -1 - if no_disk_space
2317  */
2318 
2319 int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih,	// item head of item being inserted
2320 	      const void *data	// inserted item or data to be pasted
2321     )
2322 {
2323 	int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2324 	int n_pos_in_item;
2325 
2326 	/* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2327 	 ** during wait_tb_buffers_run
2328 	 */
2329 	int wait_tb_buffers_run = 0;
2330 	struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2331 
2332 	++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes;
2333 
2334 	n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2335 
2336 	p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb);
2337 
2338 	/* we prepare and log the super here so it will already be in the
2339 	 ** transaction when do_balance needs to change it.
2340 	 ** This way do_balance won't have to schedule when trying to prepare
2341 	 ** the super for logging
2342 	 */
2343 	reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2344 				     SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1);
2345 	journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2346 			   SB_BUFFER_WITH_SB(p_s_tb->tb_sb));
2347 	if (FILESYSTEM_CHANGED_TB(p_s_tb))
2348 		return REPEAT_SEARCH;
2349 
2350 	/* if it possible in indirect_to_direct conversion */
2351 	if (buffer_locked(p_s_tbS0)) {
2352 		__wait_on_buffer(p_s_tbS0);
2353 		if (FILESYSTEM_CHANGED_TB(p_s_tb))
2354 			return REPEAT_SEARCH;
2355 	}
2356 #ifdef CONFIG_REISERFS_CHECK
2357 	if (cur_tb) {
2358 		print_cur_tb("fix_nodes");
2359 		reiserfs_panic(p_s_tb->tb_sb,
2360 			       "PAP-8305: fix_nodes:  there is pending do_balance");
2361 	}
2362 
2363 	if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) {
2364 		reiserfs_panic(p_s_tb->tb_sb,
2365 			       "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2366 			       "at the beginning of fix_nodes or not in tree (mode %c)",
2367 			       p_s_tbS0, p_s_tbS0, n_op_mode);
2368 	}
2369 
2370 	/* Check parameters. */
2371 	switch (n_op_mode) {
2372 	case M_INSERT:
2373 		if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0))
2374 			reiserfs_panic(p_s_tb->tb_sb,
2375 				       "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2376 				       n_item_num, B_NR_ITEMS(p_s_tbS0));
2377 		break;
2378 	case M_PASTE:
2379 	case M_DELETE:
2380 	case M_CUT:
2381 		if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) {
2382 			print_block(p_s_tbS0, 0, -1, -1);
2383 			reiserfs_panic(p_s_tb->tb_sb,
2384 				       "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n",
2385 				       n_item_num, n_op_mode,
2386 				       p_s_tb->insert_size[0]);
2387 		}
2388 		break;
2389 	default:
2390 		reiserfs_panic(p_s_tb->tb_sb,
2391 			       "PAP-8340: fix_nodes: Incorrect mode of operation");
2392 	}
2393 #endif
2394 
2395 	if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH)
2396 		// FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2397 		return REPEAT_SEARCH;
2398 
2399 	/* Starting from the leaf level; for all levels n_h of the tree. */
2400 	for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) {
2401 		if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) {
2402 			goto repeat;
2403 		}
2404 
2405 		if ((n_ret_value =
2406 		     check_balance(n_op_mode, p_s_tb, n_h, n_item_num,
2407 				   n_pos_in_item, p_s_ins_ih,
2408 				   data)) != CARRY_ON) {
2409 			if (n_ret_value == NO_BALANCING_NEEDED) {
2410 				/* No balancing for higher levels needed. */
2411 				if ((n_ret_value =
2412 				     get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2413 					goto repeat;
2414 				}
2415 				if (n_h != MAX_HEIGHT - 1)
2416 					p_s_tb->insert_size[n_h + 1] = 0;
2417 				/* ok, analysis and resource gathering are complete */
2418 				break;
2419 			}
2420 			goto repeat;
2421 		}
2422 
2423 		if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2424 			goto repeat;
2425 		}
2426 
2427 		if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) {
2428 			goto repeat;	/* No disk space, or schedule occurred and
2429 					   analysis may be invalid and needs to be redone. */
2430 		}
2431 
2432 		if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) {
2433 			/* We have a positive insert size but no nodes exist on this
2434 			   level, this means that we are creating a new root. */
2435 
2436 			RFALSE(p_s_tb->blknum[n_h] != 1,
2437 			       "PAP-8350: creating new empty root");
2438 
2439 			if (n_h < MAX_HEIGHT - 1)
2440 				p_s_tb->insert_size[n_h + 1] = 0;
2441 		} else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) {
2442 			if (p_s_tb->blknum[n_h] > 1) {
2443 				/* The tree needs to be grown, so this node S[n_h]
2444 				   which is the root node is split into two nodes,
2445 				   and a new node (S[n_h+1]) will be created to
2446 				   become the root node.  */
2447 
2448 				RFALSE(n_h == MAX_HEIGHT - 1,
2449 				       "PAP-8355: attempt to create too high of a tree");
2450 
2451 				p_s_tb->insert_size[n_h + 1] =
2452 				    (DC_SIZE +
2453 				     KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) +
2454 				    DC_SIZE;
2455 			} else if (n_h < MAX_HEIGHT - 1)
2456 				p_s_tb->insert_size[n_h + 1] = 0;
2457 		} else
2458 			p_s_tb->insert_size[n_h + 1] =
2459 			    (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
2460 	}
2461 
2462 	if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) {
2463 		if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2464 			wait_tb_buffers_run = 1;
2465 			n_ret_value = REPEAT_SEARCH;
2466 			goto repeat;
2467 		} else {
2468 			return CARRY_ON;
2469 		}
2470 	} else {
2471 		wait_tb_buffers_run = 1;
2472 		goto repeat;
2473 	}
2474 
2475       repeat:
2476 	// fix_nodes was unable to perform its calculation due to
2477 	// filesystem got changed under us, lack of free disk space or i/o
2478 	// failure. If the first is the case - the search will be
2479 	// repeated. For now - free all resources acquired so far except
2480 	// for the new allocated nodes
2481 	{
2482 		int i;
2483 
2484 		/* Release path buffers. */
2485 		if (wait_tb_buffers_run) {
2486 			pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path);
2487 		} else {
2488 			pathrelse(p_s_tb->tb_path);
2489 		}
2490 		/* brelse all resources collected for balancing */
2491 		for (i = 0; i < MAX_HEIGHT; i++) {
2492 			if (wait_tb_buffers_run) {
2493 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2494 								 p_s_tb->L[i]);
2495 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2496 								 p_s_tb->R[i]);
2497 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2498 								 p_s_tb->FL[i]);
2499 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2500 								 p_s_tb->FR[i]);
2501 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2502 								 p_s_tb->
2503 								 CFL[i]);
2504 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2505 								 p_s_tb->
2506 								 CFR[i]);
2507 			}
2508 
2509 			brelse(p_s_tb->L[i]);
2510 			p_s_tb->L[i] = NULL;
2511 			brelse(p_s_tb->R[i]);
2512 			p_s_tb->R[i] = NULL;
2513 			brelse(p_s_tb->FL[i]);
2514 			p_s_tb->FL[i] = NULL;
2515 			brelse(p_s_tb->FR[i]);
2516 			p_s_tb->FR[i] = NULL;
2517 			brelse(p_s_tb->CFL[i]);
2518 			p_s_tb->CFL[i] = NULL;
2519 			brelse(p_s_tb->CFR[i]);
2520 			p_s_tb->CFR[i] = NULL;
2521 		}
2522 
2523 		if (wait_tb_buffers_run) {
2524 			for (i = 0; i < MAX_FEB_SIZE; i++) {
2525 				if (p_s_tb->FEB[i]) {
2526 					reiserfs_restore_prepared_buffer
2527 					    (p_s_tb->tb_sb, p_s_tb->FEB[i]);
2528 				}
2529 			}
2530 		}
2531 		return n_ret_value;
2532 	}
2533 
2534 }
2535 
2536 /* Anatoly will probably forgive me renaming p_s_tb to tb. I just
2537    wanted to make lines shorter */
2538 void unfix_nodes(struct tree_balance *tb)
2539 {
2540 	int i;
2541 
2542 	/* Release path buffers. */
2543 	pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2544 
2545 	/* brelse all resources collected for balancing */
2546 	for (i = 0; i < MAX_HEIGHT; i++) {
2547 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
2548 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
2549 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
2550 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
2551 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
2552 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
2553 
2554 		brelse(tb->L[i]);
2555 		brelse(tb->R[i]);
2556 		brelse(tb->FL[i]);
2557 		brelse(tb->FR[i]);
2558 		brelse(tb->CFL[i]);
2559 		brelse(tb->CFR[i]);
2560 	}
2561 
2562 	/* deal with list of allocated (used and unused) nodes */
2563 	for (i = 0; i < MAX_FEB_SIZE; i++) {
2564 		if (tb->FEB[i]) {
2565 			b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2566 			/* de-allocated block which was not used by balancing and
2567 			   bforget about buffer for it */
2568 			brelse(tb->FEB[i]);
2569 			reiserfs_free_block(tb->transaction_handle, NULL,
2570 					    blocknr, 0);
2571 		}
2572 		if (tb->used[i]) {
2573 			/* release used as new nodes including a new root */
2574 			brelse(tb->used[i]);
2575 		}
2576 	}
2577 
2578 	kfree(tb->vn_buf);
2579 
2580 }
2581