xref: /linux/fs/jfs/jfs_xtree.c (revision b4ada0618eed0fbd1b1630f73deb048c592b06a1)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *   Copyright (C) International Business Machines Corp., 2000-2005
4  */
5 /*
6  *	jfs_xtree.c: extent allocation descriptor B+-tree manager
7  */
8 
9 #include <linux/fs.h>
10 #include <linux/module.h>
11 #include <linux/quotaops.h>
12 #include <linux/seq_file.h>
13 #include "jfs_incore.h"
14 #include "jfs_filsys.h"
15 #include "jfs_metapage.h"
16 #include "jfs_dmap.h"
17 #include "jfs_dinode.h"
18 #include "jfs_superblock.h"
19 #include "jfs_debug.h"
20 
21 /*
22  * xtree local flag
23  */
24 #define XT_INSERT	0x00000001
25 
26 /*
27  *	xtree key/entry comparison: extent offset
28  *
29  * return:
30  *	-1: k < start of extent
31  *	 0: start_of_extent <= k <= end_of_extent
32  *	 1: k > end_of_extent
33  */
34 #define XT_CMP(CMP, K, X, OFFSET64)\
35 {\
36 	OFFSET64 = offsetXAD(X);\
37 	(CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
38 		((K) < OFFSET64) ? -1 : 0;\
39 }
40 
41 /* write a xad entry */
42 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
43 {\
44 	(XAD)->flag = (FLAG);\
45 	XADoffset((XAD), (OFF));\
46 	XADlength((XAD), (LEN));\
47 	XADaddress((XAD), (ADDR));\
48 }
49 
50 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
51 
52 /* for consistency */
53 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
54 
55 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
56 	BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
57 /* xtree entry parameter descriptor */
58 struct xtsplit {
59 	struct metapage *mp;
60 	s16 index;
61 	u8 flag;
62 	s64 off;
63 	s64 addr;
64 	int len;
65 	struct pxdlist *pxdlist;
66 };
67 
68 
69 /*
70  *	statistics
71  */
72 #ifdef CONFIG_JFS_STATISTICS
73 static struct {
74 	uint search;
75 	uint fastSearch;
76 	uint split;
77 } xtStat;
78 #endif
79 
80 
81 /*
82  * forward references
83  */
84 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
85 		    struct btstack * btstack, int flag);
86 
87 static int xtSplitUp(tid_t tid,
88 		     struct inode *ip,
89 		     struct xtsplit * split, struct btstack * btstack);
90 
91 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
92 		       struct metapage ** rmpp, s64 * rbnp);
93 
94 static int xtSplitRoot(tid_t tid, struct inode *ip,
95 		       struct xtsplit * split, struct metapage ** rmpp);
96 
97 /*
98  *	xt_getpage()
99  *
100  * function:	get the page buffer for a specified block address.
101  *
102  * parameters:
103  *	ip      - pointer to the inode
104  *	bn      - block number (s64) of the xtree page to be retrieved;
105  *	mp      - pointer to a metapage pointer where the page buffer is returned;
106  *
107  * returns:
108  *      A pointer to the xtree page (xtpage_t) on success, -EIO on error.
109  */
110 
111 static inline xtpage_t *xt_getpage(struct inode *ip, s64 bn, struct metapage **mp)
112 {
113 	xtpage_t *p;
114 	int rc;
115 
116 	BT_GETPAGE(ip, bn, *mp, xtpage_t, PSIZE, p, rc, i_xtroot);
117 
118 	if (rc)
119 		return ERR_PTR(rc);
120 	if ((le16_to_cpu(p->header.nextindex) < XTENTRYSTART) ||
121 		(le16_to_cpu(p->header.nextindex) >
122 			le16_to_cpu(p->header.maxentry)) ||
123 		(le16_to_cpu(p->header.maxentry) >
124 			((bn == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) {
125 		jfs_error(ip->i_sb, "xt_getpage: xtree page corrupt\n");
126 		BT_PUTPAGE(*mp);
127 		*mp = NULL;
128 		return ERR_PTR(-EIO);
129 	}
130 	return p;
131 }
132 
133 /*
134  *	xtLookup()
135  *
136  * function: map a single page into a physical extent;
137  */
138 int xtLookup(struct inode *ip, s64 lstart,
139 	     s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
140 {
141 	int rc = 0;
142 	struct btstack btstack;
143 	int cmp;
144 	s64 bn;
145 	struct metapage *mp;
146 	xtpage_t *p;
147 	int index;
148 	xad_t *xad;
149 	s64 next, size, xoff, xend;
150 	int xlen;
151 	s64 xaddr;
152 
153 	*paddr = 0;
154 	*plen = llen;
155 
156 	if (!no_check) {
157 		/* is lookup offset beyond eof ? */
158 		size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
159 		    JFS_SBI(ip->i_sb)->l2bsize;
160 		if (lstart >= size)
161 			return 0;
162 	}
163 
164 	/*
165 	 * search for the xad entry covering the logical extent
166 	 */
167 //search:
168 	if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
169 		jfs_err("xtLookup: xtSearch returned %d", rc);
170 		return rc;
171 	}
172 
173 	/*
174 	 *	compute the physical extent covering logical extent
175 	 *
176 	 * N.B. search may have failed (e.g., hole in sparse file),
177 	 * and returned the index of the next entry.
178 	 */
179 	/* retrieve search result */
180 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
181 
182 	/* is xad found covering start of logical extent ?
183 	 * lstart is a page start address,
184 	 * i.e., lstart cannot start in a hole;
185 	 */
186 	if (cmp) {
187 		if (next)
188 			*plen = min(next - lstart, llen);
189 		goto out;
190 	}
191 
192 	/*
193 	 * lxd covered by xad
194 	 */
195 	xad = &p->xad[index];
196 	xoff = offsetXAD(xad);
197 	xlen = lengthXAD(xad);
198 	xend = xoff + xlen;
199 	xaddr = addressXAD(xad);
200 
201 	/* initialize new pxd */
202 	*pflag = xad->flag;
203 	*paddr = xaddr + (lstart - xoff);
204 	/* a page must be fully covered by an xad */
205 	*plen = min(xend - lstart, llen);
206 
207       out:
208 	XT_PUTPAGE(mp);
209 
210 	return rc;
211 }
212 
213 /*
214  *	xtSearch()
215  *
216  * function:	search for the xad entry covering specified offset.
217  *
218  * parameters:
219  *	ip	- file object;
220  *	xoff	- extent offset;
221  *	nextp	- address of next extent (if any) for search miss
222  *	cmpp	- comparison result:
223  *	btstack - traverse stack;
224  *	flag	- search process flag (XT_INSERT);
225  *
226  * returns:
227  *	btstack contains (bn, index) of search path traversed to the entry.
228  *	*cmpp is set to result of comparison with the entry returned.
229  *	the page containing the entry is pinned at exit.
230  */
231 static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
232 		    int *cmpp, struct btstack * btstack, int flag)
233 {
234 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
235 	int cmp = 1;		/* init for empty page */
236 	s64 bn;			/* block number */
237 	struct metapage *mp;	/* page buffer */
238 	xtpage_t *p;		/* page */
239 	xad_t *xad;
240 	int base, index, lim, btindex;
241 	struct btframe *btsp;
242 	int nsplit = 0;		/* number of pages to split */
243 	s64 t64;
244 	s64 next = 0;
245 
246 	INCREMENT(xtStat.search);
247 
248 	BT_CLR(btstack);
249 
250 	btstack->nsplit = 0;
251 
252 	/*
253 	 *	search down tree from root:
254 	 *
255 	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
256 	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
257 	 *
258 	 * if entry with search key K is not found
259 	 * internal page search find the entry with largest key Ki
260 	 * less than K which point to the child page to search;
261 	 * leaf page search find the entry with smallest key Kj
262 	 * greater than K so that the returned index is the position of
263 	 * the entry to be shifted right for insertion of new entry.
264 	 * for empty tree, search key is greater than any key of the tree.
265 	 *
266 	 * by convention, root bn = 0.
267 	 */
268 	for (bn = 0;;) {
269 		/* get/pin the page to search */
270 		p = xt_getpage(ip, bn, &mp);
271 		if (IS_ERR(p))
272 			return PTR_ERR(p);
273 
274 		/* try sequential access heuristics with the previous
275 		 * access entry in target leaf page:
276 		 * once search narrowed down into the target leaf,
277 		 * key must either match an entry in the leaf or
278 		 * key entry does not exist in the tree;
279 		 */
280 //fastSearch:
281 		if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
282 		    (p->header.flag & BT_LEAF) &&
283 		    (index = jfs_ip->btindex) <
284 		    le16_to_cpu(p->header.nextindex)) {
285 			xad = &p->xad[index];
286 			t64 = offsetXAD(xad);
287 			if (xoff < t64 + lengthXAD(xad)) {
288 				if (xoff >= t64) {
289 					*cmpp = 0;
290 					goto out;
291 				}
292 
293 				/* stop sequential access heuristics */
294 				goto binarySearch;
295 			} else {	/* (t64 + lengthXAD(xad)) <= xoff */
296 
297 				/* try next sequential entry */
298 				index++;
299 				if (index <
300 				    le16_to_cpu(p->header.nextindex)) {
301 					xad++;
302 					t64 = offsetXAD(xad);
303 					if (xoff < t64 + lengthXAD(xad)) {
304 						if (xoff >= t64) {
305 							*cmpp = 0;
306 							goto out;
307 						}
308 
309 						/* miss: key falls between
310 						 * previous and this entry
311 						 */
312 						*cmpp = 1;
313 						next = t64;
314 						goto out;
315 					}
316 
317 					/* (xoff >= t64 + lengthXAD(xad));
318 					 * matching entry may be further out:
319 					 * stop heuristic search
320 					 */
321 					/* stop sequential access heuristics */
322 					goto binarySearch;
323 				}
324 
325 				/* (index == p->header.nextindex);
326 				 * miss: key entry does not exist in
327 				 * the target leaf/tree
328 				 */
329 				*cmpp = 1;
330 				goto out;
331 			}
332 
333 			/*
334 			 * if hit, return index of the entry found, and
335 			 * if miss, where new entry with search key is
336 			 * to be inserted;
337 			 */
338 		      out:
339 			/* compute number of pages to split */
340 			if (flag & XT_INSERT) {
341 				if (p->header.nextindex ==	/* little-endian */
342 				    p->header.maxentry)
343 					nsplit++;
344 				else
345 					nsplit = 0;
346 				btstack->nsplit = nsplit;
347 			}
348 
349 			/* save search result */
350 			btsp = btstack->top;
351 			btsp->bn = bn;
352 			btsp->index = index;
353 			btsp->mp = mp;
354 
355 			/* update sequential access heuristics */
356 			jfs_ip->btindex = index;
357 
358 			if (nextp)
359 				*nextp = next;
360 
361 			INCREMENT(xtStat.fastSearch);
362 			return 0;
363 		}
364 
365 		/* well, ... full search now */
366 	      binarySearch:
367 		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
368 
369 		/*
370 		 * binary search with search key K on the current page
371 		 */
372 		for (base = XTENTRYSTART; lim; lim >>= 1) {
373 			index = base + (lim >> 1);
374 
375 			XT_CMP(cmp, xoff, &p->xad[index], t64);
376 			if (cmp == 0) {
377 				/*
378 				 *	search hit
379 				 */
380 				/* search hit - leaf page:
381 				 * return the entry found
382 				 */
383 				if (p->header.flag & BT_LEAF) {
384 					*cmpp = cmp;
385 
386 					/* compute number of pages to split */
387 					if (flag & XT_INSERT) {
388 						if (p->header.nextindex ==
389 						    p->header.maxentry)
390 							nsplit++;
391 						else
392 							nsplit = 0;
393 						btstack->nsplit = nsplit;
394 					}
395 
396 					/* save search result */
397 					btsp = btstack->top;
398 					btsp->bn = bn;
399 					btsp->index = index;
400 					btsp->mp = mp;
401 
402 					/* init sequential access heuristics */
403 					btindex = jfs_ip->btindex;
404 					if (index == btindex ||
405 					    index == btindex + 1)
406 						jfs_ip->btorder = BT_SEQUENTIAL;
407 					else
408 						jfs_ip->btorder = BT_RANDOM;
409 					jfs_ip->btindex = index;
410 
411 					return 0;
412 				}
413 				/* search hit - internal page:
414 				 * descend/search its child page
415 				 */
416 				if (index < le16_to_cpu(p->header.nextindex)-1)
417 					next = offsetXAD(&p->xad[index + 1]);
418 				goto next;
419 			}
420 
421 			if (cmp > 0) {
422 				base = index + 1;
423 				--lim;
424 			}
425 		}
426 
427 		/*
428 		 *	search miss
429 		 *
430 		 * base is the smallest index with key (Kj) greater than
431 		 * search key (K) and may be zero or maxentry index.
432 		 */
433 		if (base < le16_to_cpu(p->header.nextindex))
434 			next = offsetXAD(&p->xad[base]);
435 		/*
436 		 * search miss - leaf page:
437 		 *
438 		 * return location of entry (base) where new entry with
439 		 * search key K is to be inserted.
440 		 */
441 		if (p->header.flag & BT_LEAF) {
442 			*cmpp = cmp;
443 
444 			/* compute number of pages to split */
445 			if (flag & XT_INSERT) {
446 				if (p->header.nextindex ==
447 				    p->header.maxentry)
448 					nsplit++;
449 				else
450 					nsplit = 0;
451 				btstack->nsplit = nsplit;
452 			}
453 
454 			/* save search result */
455 			btsp = btstack->top;
456 			btsp->bn = bn;
457 			btsp->index = base;
458 			btsp->mp = mp;
459 
460 			/* init sequential access heuristics */
461 			btindex = jfs_ip->btindex;
462 			if (base == btindex || base == btindex + 1)
463 				jfs_ip->btorder = BT_SEQUENTIAL;
464 			else
465 				jfs_ip->btorder = BT_RANDOM;
466 			jfs_ip->btindex = base;
467 
468 			if (nextp)
469 				*nextp = next;
470 
471 			return 0;
472 		}
473 
474 		/*
475 		 * search miss - non-leaf page:
476 		 *
477 		 * if base is non-zero, decrement base by one to get the parent
478 		 * entry of the child page to search.
479 		 */
480 		index = base ? base - 1 : base;
481 
482 		/*
483 		 * go down to child page
484 		 */
485 	      next:
486 		/* update number of pages to split */
487 		if (p->header.nextindex == p->header.maxentry)
488 			nsplit++;
489 		else
490 			nsplit = 0;
491 
492 		/* push (bn, index) of the parent page/entry */
493 		if (BT_STACK_FULL(btstack)) {
494 			jfs_error(ip->i_sb, "stack overrun!\n");
495 			XT_PUTPAGE(mp);
496 			return -EIO;
497 		}
498 		BT_PUSH(btstack, bn, index);
499 
500 		/* get the child page block number */
501 		bn = addressXAD(&p->xad[index]);
502 
503 		/* unpin the parent page */
504 		XT_PUTPAGE(mp);
505 	}
506 }
507 
508 /*
509  *	xtInsert()
510  *
511  * function:
512  *
513  * parameter:
514  *	tid	- transaction id;
515  *	ip	- file object;
516  *	xflag	- extent flag (XAD_NOTRECORDED):
517  *	xoff	- extent offset;
518  *	xlen	- extent length;
519  *	xaddrp	- extent address pointer (in/out):
520  *		if (*xaddrp)
521  *			caller allocated data extent at *xaddrp;
522  *		else
523  *			allocate data extent and return its xaddr;
524  *	flag	-
525  *
526  * return:
527  */
528 int xtInsert(tid_t tid,		/* transaction id */
529 	     struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
530 	     int flag)
531 {
532 	int rc = 0;
533 	s64 xaddr, hint;
534 	struct metapage *mp;	/* meta-page buffer */
535 	xtpage_t *p;		/* base B+-tree index page */
536 	s64 bn;
537 	int index, nextindex;
538 	struct btstack btstack;	/* traverse stack */
539 	struct xtsplit split;	/* split information */
540 	xad_t *xad;
541 	int cmp;
542 	s64 next;
543 	struct tlock *tlck;
544 	struct xtlock *xtlck;
545 
546 	jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
547 
548 	/*
549 	 *	search for the entry location at which to insert:
550 	 *
551 	 * xtFastSearch() and xtSearch() both returns (leaf page
552 	 * pinned, index at which to insert).
553 	 * n.b. xtSearch() may return index of maxentry of
554 	 * the full page.
555 	 */
556 	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
557 		return rc;
558 
559 	/* retrieve search result */
560 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
561 
562 	/* This test must follow XT_GETSEARCH since mp must be valid if
563 	 * we branch to out: */
564 	if ((cmp == 0) || (next && (xlen > next - xoff))) {
565 		rc = -EEXIST;
566 		goto out;
567 	}
568 
569 	/*
570 	 * allocate data extent requested
571 	 *
572 	 * allocation hint: last xad
573 	 */
574 	if ((xaddr = *xaddrp) == 0) {
575 		if (index > XTENTRYSTART) {
576 			xad = &p->xad[index - 1];
577 			hint = addressXAD(xad) + lengthXAD(xad) - 1;
578 		} else
579 			hint = 0;
580 		if ((rc = dquot_alloc_block(ip, xlen)))
581 			goto out;
582 		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
583 			dquot_free_block(ip, xlen);
584 			goto out;
585 		}
586 	}
587 
588 	/*
589 	 *	insert entry for new extent
590 	 */
591 	xflag |= XAD_NEW;
592 
593 	/*
594 	 *	if the leaf page is full, split the page and
595 	 *	propagate up the router entry for the new page from split
596 	 *
597 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
598 	 */
599 	nextindex = le16_to_cpu(p->header.nextindex);
600 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
601 		split.mp = mp;
602 		split.index = index;
603 		split.flag = xflag;
604 		split.off = xoff;
605 		split.len = xlen;
606 		split.addr = xaddr;
607 		split.pxdlist = NULL;
608 		if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
609 			/* undo data extent allocation */
610 			if (*xaddrp == 0) {
611 				dbFree(ip, xaddr, (s64) xlen);
612 				dquot_free_block(ip, xlen);
613 			}
614 			return rc;
615 		}
616 
617 		*xaddrp = xaddr;
618 		return 0;
619 	}
620 
621 	/*
622 	 *	insert the new entry into the leaf page
623 	 */
624 	/*
625 	 * acquire a transaction lock on the leaf page;
626 	 *
627 	 * action: xad insertion/extension;
628 	 */
629 	BT_MARK_DIRTY(mp, ip);
630 
631 	/* if insert into middle, shift right remaining entries. */
632 	if (index < nextindex)
633 		memmove(&p->xad[index + 1], &p->xad[index],
634 			(nextindex - index) * sizeof(xad_t));
635 
636 	/* insert the new entry: mark the entry NEW */
637 	xad = &p->xad[index];
638 	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
639 
640 	/* advance next available entry index */
641 	le16_add_cpu(&p->header.nextindex, 1);
642 
643 	/* Don't log it if there are no links to the file */
644 	if (!test_cflag(COMMIT_Nolink, ip)) {
645 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
646 		xtlck = (struct xtlock *) & tlck->lock;
647 		xtlck->lwm.offset =
648 		    (xtlck->lwm.offset) ? min(index,
649 					      (int)xtlck->lwm.offset) : index;
650 		xtlck->lwm.length =
651 		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
652 	}
653 
654 	*xaddrp = xaddr;
655 
656       out:
657 	/* unpin the leaf page */
658 	XT_PUTPAGE(mp);
659 
660 	return rc;
661 }
662 
663 
664 /*
665  *	xtSplitUp()
666  *
667  * function:
668  *	split full pages as propagating insertion up the tree
669  *
670  * parameter:
671  *	tid	- transaction id;
672  *	ip	- file object;
673  *	split	- entry parameter descriptor;
674  *	btstack - traverse stack from xtSearch()
675  *
676  * return:
677  */
678 static int
679 xtSplitUp(tid_t tid,
680 	  struct inode *ip, struct xtsplit * split, struct btstack * btstack)
681 {
682 	int rc = 0;
683 	struct metapage *smp;
684 	xtpage_t *sp;		/* split page */
685 	struct metapage *rmp;
686 	s64 rbn;		/* new right page block number */
687 	struct metapage *rcmp;
688 	xtpage_t *rcp;		/* right child page */
689 	s64 rcbn;		/* right child page block number */
690 	int skip;		/* index of entry of insertion */
691 	int nextindex;		/* next available entry index of p */
692 	struct btframe *parent;	/* parent page entry on traverse stack */
693 	xad_t *xad;
694 	s64 xaddr;
695 	int xlen;
696 	int nsplit;		/* number of pages split */
697 	struct pxdlist pxdlist;
698 	pxd_t *pxd;
699 	struct tlock *tlck;
700 	struct xtlock *xtlck;
701 
702 	smp = split->mp;
703 	sp = XT_PAGE(ip, smp);
704 
705 	/* is inode xtree root extension/inline EA area free ? */
706 	if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
707 	    (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
708 	    (JFS_IP(ip)->mode2 & INLINEEA)) {
709 		sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
710 		JFS_IP(ip)->mode2 &= ~INLINEEA;
711 
712 		BT_MARK_DIRTY(smp, ip);
713 		/*
714 		 * acquire a transaction lock on the leaf page;
715 		 *
716 		 * action: xad insertion/extension;
717 		 */
718 
719 		/* if insert into middle, shift right remaining entries. */
720 		skip = split->index;
721 		nextindex = le16_to_cpu(sp->header.nextindex);
722 		if (skip < nextindex)
723 			memmove(&sp->xad[skip + 1], &sp->xad[skip],
724 				(nextindex - skip) * sizeof(xad_t));
725 
726 		/* insert the new entry: mark the entry NEW */
727 		xad = &sp->xad[skip];
728 		XT_PUTENTRY(xad, split->flag, split->off, split->len,
729 			    split->addr);
730 
731 		/* advance next available entry index */
732 		le16_add_cpu(&sp->header.nextindex, 1);
733 
734 		/* Don't log it if there are no links to the file */
735 		if (!test_cflag(COMMIT_Nolink, ip)) {
736 			tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
737 			xtlck = (struct xtlock *) & tlck->lock;
738 			xtlck->lwm.offset = (xtlck->lwm.offset) ?
739 			    min(skip, (int)xtlck->lwm.offset) : skip;
740 			xtlck->lwm.length =
741 			    le16_to_cpu(sp->header.nextindex) -
742 			    xtlck->lwm.offset;
743 		}
744 
745 		return 0;
746 	}
747 
748 	/*
749 	 * allocate new index blocks to cover index page split(s)
750 	 *
751 	 * allocation hint: ?
752 	 */
753 	if (split->pxdlist == NULL) {
754 		nsplit = btstack->nsplit;
755 		split->pxdlist = &pxdlist;
756 		pxdlist.maxnpxd = pxdlist.npxd = 0;
757 		pxd = &pxdlist.pxd[0];
758 		xlen = JFS_SBI(ip->i_sb)->nbperpage;
759 		for (; nsplit > 0; nsplit--, pxd++) {
760 			if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
761 			    == 0) {
762 				PXDaddress(pxd, xaddr);
763 				PXDlength(pxd, xlen);
764 
765 				pxdlist.maxnpxd++;
766 
767 				continue;
768 			}
769 
770 			/* undo allocation */
771 
772 			XT_PUTPAGE(smp);
773 			return rc;
774 		}
775 	}
776 
777 	/*
778 	 * Split leaf page <sp> into <sp> and a new right page <rp>.
779 	 *
780 	 * The split routines insert the new entry into the leaf page,
781 	 * and acquire txLock as appropriate.
782 	 * return <rp> pinned and its block number <rpbn>.
783 	 */
784 	rc = (sp->header.flag & BT_ROOT) ?
785 	    xtSplitRoot(tid, ip, split, &rmp) :
786 	    xtSplitPage(tid, ip, split, &rmp, &rbn);
787 
788 	XT_PUTPAGE(smp);
789 
790 	if (rc)
791 		return -EIO;
792 	/*
793 	 * propagate up the router entry for the leaf page just split
794 	 *
795 	 * insert a router entry for the new page into the parent page,
796 	 * propagate the insert/split up the tree by walking back the stack
797 	 * of (bn of parent page, index of child page entry in parent page)
798 	 * that were traversed during the search for the page that split.
799 	 *
800 	 * the propagation of insert/split up the tree stops if the root
801 	 * splits or the page inserted into doesn't have to split to hold
802 	 * the new entry.
803 	 *
804 	 * the parent entry for the split page remains the same, and
805 	 * a new entry is inserted at its right with the first key and
806 	 * block number of the new right page.
807 	 *
808 	 * There are a maximum of 3 pages pinned at any time:
809 	 * right child, left parent and right parent (when the parent splits)
810 	 * to keep the child page pinned while working on the parent.
811 	 * make sure that all pins are released at exit.
812 	 */
813 	while ((parent = BT_POP(btstack)) != NULL) {
814 		/* parent page specified by stack frame <parent> */
815 
816 		/* keep current child pages <rcp> pinned */
817 		rcmp = rmp;
818 		rcbn = rbn;
819 		rcp = XT_PAGE(ip, rcmp);
820 
821 		/*
822 		 * insert router entry in parent for new right child page <rp>
823 		 */
824 		/* get/pin the parent page <sp> */
825 		sp = xt_getpage(ip, parent->bn, &smp);
826 		if (IS_ERR(sp)) {
827 			XT_PUTPAGE(rcmp);
828 			return PTR_ERR(sp);
829 		}
830 
831 		/*
832 		 * The new key entry goes ONE AFTER the index of parent entry,
833 		 * because the split was to the right.
834 		 */
835 		skip = parent->index + 1;
836 
837 		/*
838 		 * split or shift right remaining entries of the parent page
839 		 */
840 		nextindex = le16_to_cpu(sp->header.nextindex);
841 		/*
842 		 * parent page is full - split the parent page
843 		 */
844 		if (nextindex == le16_to_cpu(sp->header.maxentry)) {
845 			/* init for parent page split */
846 			split->mp = smp;
847 			split->index = skip;	/* index at insert */
848 			split->flag = XAD_NEW;
849 			split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
850 			split->len = JFS_SBI(ip->i_sb)->nbperpage;
851 			split->addr = rcbn;
852 
853 			/* unpin previous right child page */
854 			XT_PUTPAGE(rcmp);
855 
856 			/* The split routines insert the new entry,
857 			 * and acquire txLock as appropriate.
858 			 * return <rp> pinned and its block number <rpbn>.
859 			 */
860 			rc = (sp->header.flag & BT_ROOT) ?
861 			    xtSplitRoot(tid, ip, split, &rmp) :
862 			    xtSplitPage(tid, ip, split, &rmp, &rbn);
863 			if (rc) {
864 				XT_PUTPAGE(smp);
865 				return rc;
866 			}
867 
868 			XT_PUTPAGE(smp);
869 			/* keep new child page <rp> pinned */
870 		}
871 		/*
872 		 * parent page is not full - insert in parent page
873 		 */
874 		else {
875 			/*
876 			 * insert router entry in parent for the right child
877 			 * page from the first entry of the right child page:
878 			 */
879 			/*
880 			 * acquire a transaction lock on the parent page;
881 			 *
882 			 * action: router xad insertion;
883 			 */
884 			BT_MARK_DIRTY(smp, ip);
885 
886 			/*
887 			 * if insert into middle, shift right remaining entries
888 			 */
889 			if (skip < nextindex)
890 				memmove(&sp->xad[skip + 1], &sp->xad[skip],
891 					(nextindex -
892 					 skip) << L2XTSLOTSIZE);
893 
894 			/* insert the router entry */
895 			xad = &sp->xad[skip];
896 			XT_PUTENTRY(xad, XAD_NEW,
897 				    offsetXAD(&rcp->xad[XTENTRYSTART]),
898 				    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
899 
900 			/* advance next available entry index. */
901 			le16_add_cpu(&sp->header.nextindex, 1);
902 
903 			/* Don't log it if there are no links to the file */
904 			if (!test_cflag(COMMIT_Nolink, ip)) {
905 				tlck = txLock(tid, ip, smp,
906 					      tlckXTREE | tlckGROW);
907 				xtlck = (struct xtlock *) & tlck->lock;
908 				xtlck->lwm.offset = (xtlck->lwm.offset) ?
909 				    min(skip, (int)xtlck->lwm.offset) : skip;
910 				xtlck->lwm.length =
911 				    le16_to_cpu(sp->header.nextindex) -
912 				    xtlck->lwm.offset;
913 			}
914 
915 			/* unpin parent page */
916 			XT_PUTPAGE(smp);
917 
918 			/* exit propagate up */
919 			break;
920 		}
921 	}
922 
923 	/* unpin current right page */
924 	XT_PUTPAGE(rmp);
925 
926 	return 0;
927 }
928 
929 
930 /*
931  *	xtSplitPage()
932  *
933  * function:
934  *	split a full non-root page into
935  *	original/split/left page and new right page
936  *	i.e., the original/split page remains as left page.
937  *
938  * parameter:
939  *	int		tid,
940  *	struct inode	*ip,
941  *	struct xtsplit	*split,
942  *	struct metapage	**rmpp,
943  *	u64		*rbnp,
944  *
945  * return:
946  *	Pointer to page in which to insert or NULL on error.
947  */
948 static int
949 xtSplitPage(tid_t tid, struct inode *ip,
950 	    struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
951 {
952 	int rc = 0;
953 	struct metapage *smp;
954 	xtpage_t *sp;
955 	struct metapage *rmp;
956 	xtpage_t *rp;		/* new right page allocated */
957 	s64 rbn;		/* new right page block number */
958 	struct metapage *mp;
959 	xtpage_t *p;
960 	s64 nextbn;
961 	int skip, maxentry, middle, righthalf, n;
962 	xad_t *xad;
963 	struct pxdlist *pxdlist;
964 	pxd_t *pxd;
965 	struct tlock *tlck;
966 	struct xtlock *sxtlck = NULL, *rxtlck = NULL;
967 	int quota_allocation = 0;
968 
969 	smp = split->mp;
970 	sp = XT_PAGE(ip, smp);
971 
972 	INCREMENT(xtStat.split);
973 
974 	pxdlist = split->pxdlist;
975 	pxd = &pxdlist->pxd[pxdlist->npxd];
976 	pxdlist->npxd++;
977 	rbn = addressPXD(pxd);
978 
979 	/* Allocate blocks to quota. */
980 	rc = dquot_alloc_block(ip, lengthPXD(pxd));
981 	if (rc)
982 		goto clean_up;
983 
984 	quota_allocation += lengthPXD(pxd);
985 
986 	/*
987 	 * allocate the new right page for the split
988 	 */
989 	rmp = get_metapage(ip, rbn, PSIZE, 1);
990 	if (rmp == NULL) {
991 		rc = -EIO;
992 		goto clean_up;
993 	}
994 
995 	jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
996 
997 	BT_MARK_DIRTY(rmp, ip);
998 	/*
999 	 * action: new page;
1000 	 */
1001 
1002 	rp = (xtpage_t *) rmp->data;
1003 	rp->header.self = *pxd;
1004 	rp->header.flag = sp->header.flag & BT_TYPE;
1005 	rp->header.maxentry = sp->header.maxentry;	/* little-endian */
1006 	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1007 
1008 	BT_MARK_DIRTY(smp, ip);
1009 	/* Don't log it if there are no links to the file */
1010 	if (!test_cflag(COMMIT_Nolink, ip)) {
1011 		/*
1012 		 * acquire a transaction lock on the new right page;
1013 		 */
1014 		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1015 		rxtlck = (struct xtlock *) & tlck->lock;
1016 		rxtlck->lwm.offset = XTENTRYSTART;
1017 		/*
1018 		 * acquire a transaction lock on the split page
1019 		 */
1020 		tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
1021 		sxtlck = (struct xtlock *) & tlck->lock;
1022 	}
1023 
1024 	/*
1025 	 * initialize/update sibling pointers of <sp> and <rp>
1026 	 */
1027 	nextbn = le64_to_cpu(sp->header.next);
1028 	rp->header.next = cpu_to_le64(nextbn);
1029 	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
1030 	sp->header.next = cpu_to_le64(rbn);
1031 
1032 	skip = split->index;
1033 
1034 	/*
1035 	 *	sequential append at tail (after last entry of last page)
1036 	 *
1037 	 * if splitting the last page on a level because of appending
1038 	 * a entry to it (skip is maxentry), it's likely that the access is
1039 	 * sequential. adding an empty page on the side of the level is less
1040 	 * work and can push the fill factor much higher than normal.
1041 	 * if we're wrong it's no big deal -  we will do the split the right
1042 	 * way next time.
1043 	 * (it may look like it's equally easy to do a similar hack for
1044 	 * reverse sorted data, that is, split the tree left, but it's not.
1045 	 * Be my guest.)
1046 	 */
1047 	if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
1048 		/*
1049 		 * acquire a transaction lock on the new/right page;
1050 		 *
1051 		 * action: xad insertion;
1052 		 */
1053 		/* insert entry at the first entry of the new right page */
1054 		xad = &rp->xad[XTENTRYSTART];
1055 		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1056 			    split->addr);
1057 
1058 		rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1059 
1060 		if (!test_cflag(COMMIT_Nolink, ip)) {
1061 			/* rxtlck->lwm.offset = XTENTRYSTART; */
1062 			rxtlck->lwm.length = 1;
1063 		}
1064 
1065 		*rmpp = rmp;
1066 		*rbnp = rbn;
1067 
1068 		jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1069 		return 0;
1070 	}
1071 
1072 	/*
1073 	 *	non-sequential insert (at possibly middle page)
1074 	 */
1075 
1076 	/*
1077 	 * update previous pointer of old next/right page of <sp>
1078 	 */
1079 	if (nextbn != 0) {
1080 		p = xt_getpage(ip, nextbn, &mp);
1081 		if (IS_ERR(p)) {
1082 			XT_PUTPAGE(rmp);
1083 			return PTR_ERR(p);
1084 		}
1085 
1086 		BT_MARK_DIRTY(mp, ip);
1087 		/*
1088 		 * acquire a transaction lock on the next page;
1089 		 *
1090 		 * action:sibling pointer update;
1091 		 */
1092 		if (!test_cflag(COMMIT_Nolink, ip))
1093 			tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
1094 
1095 		p->header.prev = cpu_to_le64(rbn);
1096 
1097 		/* sibling page may have been updated previously, or
1098 		 * it may be updated later;
1099 		 */
1100 
1101 		XT_PUTPAGE(mp);
1102 	}
1103 
1104 	/*
1105 	 * split the data between the split and new/right pages
1106 	 */
1107 	maxentry = le16_to_cpu(sp->header.maxentry);
1108 	middle = maxentry >> 1;
1109 	righthalf = maxentry - middle;
1110 
1111 	/*
1112 	 * skip index in old split/left page - insert into left page:
1113 	 */
1114 	if (skip <= middle) {
1115 		/* move right half of split page to the new right page */
1116 		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1117 			righthalf << L2XTSLOTSIZE);
1118 
1119 		/* shift right tail of left half to make room for new entry */
1120 		if (skip < middle)
1121 			memmove(&sp->xad[skip + 1], &sp->xad[skip],
1122 				(middle - skip) << L2XTSLOTSIZE);
1123 
1124 		/* insert new entry */
1125 		xad = &sp->xad[skip];
1126 		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1127 			    split->addr);
1128 
1129 		/* update page header */
1130 		sp->header.nextindex = cpu_to_le16(middle + 1);
1131 		if (!test_cflag(COMMIT_Nolink, ip)) {
1132 			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1133 			    min(skip, (int)sxtlck->lwm.offset) : skip;
1134 		}
1135 
1136 		rp->header.nextindex =
1137 		    cpu_to_le16(XTENTRYSTART + righthalf);
1138 	}
1139 	/*
1140 	 * skip index in new right page - insert into right page:
1141 	 */
1142 	else {
1143 		/* move left head of right half to right page */
1144 		n = skip - middle;
1145 		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
1146 			n << L2XTSLOTSIZE);
1147 
1148 		/* insert new entry */
1149 		n += XTENTRYSTART;
1150 		xad = &rp->xad[n];
1151 		XT_PUTENTRY(xad, split->flag, split->off, split->len,
1152 			    split->addr);
1153 
1154 		/* move right tail of right half to right page */
1155 		if (skip < maxentry)
1156 			memmove(&rp->xad[n + 1], &sp->xad[skip],
1157 				(maxentry - skip) << L2XTSLOTSIZE);
1158 
1159 		/* update page header */
1160 		sp->header.nextindex = cpu_to_le16(middle);
1161 		if (!test_cflag(COMMIT_Nolink, ip)) {
1162 			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
1163 			    min(middle, (int)sxtlck->lwm.offset) : middle;
1164 		}
1165 
1166 		rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
1167 						   righthalf + 1);
1168 	}
1169 
1170 	if (!test_cflag(COMMIT_Nolink, ip)) {
1171 		sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
1172 		    sxtlck->lwm.offset;
1173 
1174 		/* rxtlck->lwm.offset = XTENTRYSTART; */
1175 		rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1176 		    XTENTRYSTART;
1177 	}
1178 
1179 	*rmpp = rmp;
1180 	*rbnp = rbn;
1181 
1182 	jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
1183 	return rc;
1184 
1185       clean_up:
1186 
1187 	/* Rollback quota allocation. */
1188 	if (quota_allocation)
1189 		dquot_free_block(ip, quota_allocation);
1190 
1191 	return (rc);
1192 }
1193 
1194 
1195 /*
1196  *	xtSplitRoot()
1197  *
1198  * function:
1199  *	split the full root page into original/root/split page and new
1200  *	right page
1201  *	i.e., root remains fixed in tree anchor (inode) and the root is
1202  *	copied to a single new right child page since root page <<
1203  *	non-root page, and the split root page contains a single entry
1204  *	for the new right child page.
1205  *
1206  * parameter:
1207  *	int		tid,
1208  *	struct inode	*ip,
1209  *	struct xtsplit	*split,
1210  *	struct metapage	**rmpp)
1211  *
1212  * return:
1213  *	Pointer to page in which to insert or NULL on error.
1214  */
1215 static int
1216 xtSplitRoot(tid_t tid,
1217 	    struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
1218 {
1219 	xtpage_t *sp;
1220 	struct metapage *rmp;
1221 	xtpage_t *rp;
1222 	s64 rbn;
1223 	int skip, nextindex;
1224 	xad_t *xad;
1225 	pxd_t *pxd;
1226 	struct pxdlist *pxdlist;
1227 	struct tlock *tlck;
1228 	struct xtlock *xtlck;
1229 	int rc;
1230 
1231 	sp = (xtpage_t *) &JFS_IP(ip)->i_xtroot;
1232 
1233 	INCREMENT(xtStat.split);
1234 
1235 	/*
1236 	 *	allocate a single (right) child page
1237 	 */
1238 	pxdlist = split->pxdlist;
1239 	pxd = &pxdlist->pxd[pxdlist->npxd];
1240 	pxdlist->npxd++;
1241 	rbn = addressPXD(pxd);
1242 	rmp = get_metapage(ip, rbn, PSIZE, 1);
1243 	if (rmp == NULL)
1244 		return -EIO;
1245 
1246 	/* Allocate blocks to quota. */
1247 	rc = dquot_alloc_block(ip, lengthPXD(pxd));
1248 	if (rc) {
1249 		release_metapage(rmp);
1250 		return rc;
1251 	}
1252 
1253 	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
1254 
1255 	/*
1256 	 * acquire a transaction lock on the new right page;
1257 	 *
1258 	 * action: new page;
1259 	 */
1260 	BT_MARK_DIRTY(rmp, ip);
1261 
1262 	rp = (xtpage_t *) rmp->data;
1263 	rp->header.flag =
1264 	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
1265 	rp->header.self = *pxd;
1266 	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
1267 	rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
1268 
1269 	/* initialize sibling pointers */
1270 	rp->header.next = 0;
1271 	rp->header.prev = 0;
1272 
1273 	/*
1274 	 * copy the in-line root page into new right page extent
1275 	 */
1276 	nextindex = le16_to_cpu(sp->header.maxentry);
1277 	memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
1278 		(nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
1279 
1280 	/*
1281 	 * insert the new entry into the new right/child page
1282 	 * (skip index in the new right page will not change)
1283 	 */
1284 	skip = split->index;
1285 	/* if insert into middle, shift right remaining entries */
1286 	if (skip != nextindex)
1287 		memmove(&rp->xad[skip + 1], &rp->xad[skip],
1288 			(nextindex - skip) * sizeof(xad_t));
1289 
1290 	xad = &rp->xad[skip];
1291 	XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
1292 
1293 	/* update page header */
1294 	rp->header.nextindex = cpu_to_le16(nextindex + 1);
1295 
1296 	if (!test_cflag(COMMIT_Nolink, ip)) {
1297 		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
1298 		xtlck = (struct xtlock *) & tlck->lock;
1299 		xtlck->lwm.offset = XTENTRYSTART;
1300 		xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
1301 		    XTENTRYSTART;
1302 	}
1303 
1304 	/*
1305 	 *	reset the root
1306 	 *
1307 	 * init root with the single entry for the new right page
1308 	 * set the 1st entry offset to 0, which force the left-most key
1309 	 * at any level of the tree to be less than any search key.
1310 	 */
1311 	/*
1312 	 * acquire a transaction lock on the root page (in-memory inode);
1313 	 *
1314 	 * action: root split;
1315 	 */
1316 	BT_MARK_DIRTY(split->mp, ip);
1317 
1318 	xad = &sp->xad[XTENTRYSTART];
1319 	XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
1320 
1321 	/* update page header of root */
1322 	sp->header.flag &= ~BT_LEAF;
1323 	sp->header.flag |= BT_INTERNAL;
1324 
1325 	sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
1326 
1327 	if (!test_cflag(COMMIT_Nolink, ip)) {
1328 		tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
1329 		xtlck = (struct xtlock *) & tlck->lock;
1330 		xtlck->lwm.offset = XTENTRYSTART;
1331 		xtlck->lwm.length = 1;
1332 	}
1333 
1334 	*rmpp = rmp;
1335 
1336 	jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
1337 	return 0;
1338 }
1339 
1340 
1341 /*
1342  *	xtExtend()
1343  *
1344  * function: extend in-place;
1345  *
1346  * note: existing extent may or may not have been committed.
1347  * caller is responsible for pager buffer cache update, and
1348  * working block allocation map update;
1349  * update pmap: alloc whole extended extent;
1350  */
1351 int xtExtend(tid_t tid,		/* transaction id */
1352 	     struct inode *ip, s64 xoff,	/* delta extent offset */
1353 	     s32 xlen,		/* delta extent length */
1354 	     int flag)
1355 {
1356 	int rc = 0;
1357 	int cmp;
1358 	struct metapage *mp;	/* meta-page buffer */
1359 	xtpage_t *p;		/* base B+-tree index page */
1360 	s64 bn;
1361 	int index, nextindex, len;
1362 	struct btstack btstack;	/* traverse stack */
1363 	struct xtsplit split;	/* split information */
1364 	xad_t *xad;
1365 	s64 xaddr;
1366 	struct tlock *tlck;
1367 	struct xtlock *xtlck = NULL;
1368 
1369 	jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1370 
1371 	/* there must exist extent to be extended */
1372 	if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1373 		return rc;
1374 
1375 	/* retrieve search result */
1376 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1377 
1378 	if (cmp != 0) {
1379 		XT_PUTPAGE(mp);
1380 		jfs_error(ip->i_sb, "xtSearch did not find extent\n");
1381 		return -EIO;
1382 	}
1383 
1384 	/* extension must be contiguous */
1385 	xad = &p->xad[index];
1386 	if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
1387 		XT_PUTPAGE(mp);
1388 		jfs_error(ip->i_sb, "extension is not contiguous\n");
1389 		return -EIO;
1390 	}
1391 
1392 	/*
1393 	 * acquire a transaction lock on the leaf page;
1394 	 *
1395 	 * action: xad insertion/extension;
1396 	 */
1397 	BT_MARK_DIRTY(mp, ip);
1398 	if (!test_cflag(COMMIT_Nolink, ip)) {
1399 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1400 		xtlck = (struct xtlock *) & tlck->lock;
1401 	}
1402 
1403 	/* extend will overflow extent ? */
1404 	xlen = lengthXAD(xad) + xlen;
1405 	if ((len = xlen - MAXXLEN) <= 0)
1406 		goto extendOld;
1407 
1408 	/*
1409 	 *	extent overflow: insert entry for new extent
1410 	 */
1411 //insertNew:
1412 	xoff = offsetXAD(xad) + MAXXLEN;
1413 	xaddr = addressXAD(xad) + MAXXLEN;
1414 	nextindex = le16_to_cpu(p->header.nextindex);
1415 
1416 	/*
1417 	 *	if the leaf page is full, insert the new entry and
1418 	 *	propagate up the router entry for the new page from split
1419 	 *
1420 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
1421 	 */
1422 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1423 		/* xtSpliUp() unpins leaf pages */
1424 		split.mp = mp;
1425 		split.index = index + 1;
1426 		split.flag = XAD_NEW;
1427 		split.off = xoff;	/* split offset */
1428 		split.len = len;
1429 		split.addr = xaddr;
1430 		split.pxdlist = NULL;
1431 		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1432 			return rc;
1433 
1434 		/* get back old page */
1435 		p = xt_getpage(ip, bn, &mp);
1436 		if (IS_ERR(p))
1437 			return PTR_ERR(p);
1438 		/*
1439 		 * if leaf root has been split, original root has been
1440 		 * copied to new child page, i.e., original entry now
1441 		 * resides on the new child page;
1442 		 */
1443 		if (p->header.flag & BT_INTERNAL) {
1444 			ASSERT(p->header.nextindex ==
1445 			       cpu_to_le16(XTENTRYSTART + 1));
1446 			xad = &p->xad[XTENTRYSTART];
1447 			bn = addressXAD(xad);
1448 			XT_PUTPAGE(mp);
1449 
1450 			/* get new child page */
1451 			p = xt_getpage(ip, bn, &mp);
1452 			if (IS_ERR(p))
1453 				return PTR_ERR(p);
1454 
1455 			BT_MARK_DIRTY(mp, ip);
1456 			if (!test_cflag(COMMIT_Nolink, ip)) {
1457 				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1458 				xtlck = (struct xtlock *) & tlck->lock;
1459 			}
1460 		}
1461 	}
1462 	/*
1463 	 *	insert the new entry into the leaf page
1464 	 */
1465 	else {
1466 		/* insert the new entry: mark the entry NEW */
1467 		xad = &p->xad[index + 1];
1468 		XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
1469 
1470 		/* advance next available entry index */
1471 		le16_add_cpu(&p->header.nextindex, 1);
1472 	}
1473 
1474 	/* get back old entry */
1475 	xad = &p->xad[index];
1476 	xlen = MAXXLEN;
1477 
1478 	/*
1479 	 * extend old extent
1480 	 */
1481       extendOld:
1482 	XADlength(xad, xlen);
1483 	if (!(xad->flag & XAD_NEW))
1484 		xad->flag |= XAD_EXTENDED;
1485 
1486 	if (!test_cflag(COMMIT_Nolink, ip)) {
1487 		xtlck->lwm.offset =
1488 		    (xtlck->lwm.offset) ? min(index,
1489 					      (int)xtlck->lwm.offset) : index;
1490 		xtlck->lwm.length =
1491 		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
1492 	}
1493 
1494 	/* unpin the leaf page */
1495 	XT_PUTPAGE(mp);
1496 
1497 	return rc;
1498 }
1499 
1500 /*
1501  *	xtUpdate()
1502  *
1503  * function: update XAD;
1504  *
1505  *	update extent for allocated_but_not_recorded or
1506  *	compressed extent;
1507  *
1508  * parameter:
1509  *	nxad	- new XAD;
1510  *		logical extent of the specified XAD must be completely
1511  *		contained by an existing XAD;
1512  */
1513 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1514 {				/* new XAD */
1515 	int rc = 0;
1516 	int cmp;
1517 	struct metapage *mp;	/* meta-page buffer */
1518 	xtpage_t *p;		/* base B+-tree index page */
1519 	s64 bn;
1520 	int index0, index, newindex, nextindex;
1521 	struct btstack btstack;	/* traverse stack */
1522 	struct xtsplit split;	/* split information */
1523 	xad_t *xad, *lxad, *rxad;
1524 	int xflag;
1525 	s64 nxoff, xoff;
1526 	int nxlen, xlen, lxlen, rxlen;
1527 	s64 nxaddr, xaddr;
1528 	struct tlock *tlck;
1529 	struct xtlock *xtlck = NULL;
1530 	int newpage = 0;
1531 
1532 	/* there must exist extent to be tailgated */
1533 	nxoff = offsetXAD(nxad);
1534 	nxlen = lengthXAD(nxad);
1535 	nxaddr = addressXAD(nxad);
1536 
1537 	if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1538 		return rc;
1539 
1540 	/* retrieve search result */
1541 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1542 
1543 	if (cmp != 0) {
1544 		XT_PUTPAGE(mp);
1545 		jfs_error(ip->i_sb, "Could not find extent\n");
1546 		return -EIO;
1547 	}
1548 
1549 	BT_MARK_DIRTY(mp, ip);
1550 	/*
1551 	 * acquire tlock of the leaf page containing original entry
1552 	 */
1553 	if (!test_cflag(COMMIT_Nolink, ip)) {
1554 		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1555 		xtlck = (struct xtlock *) & tlck->lock;
1556 	}
1557 
1558 	xad = &p->xad[index0];
1559 	xflag = xad->flag;
1560 	xoff = offsetXAD(xad);
1561 	xlen = lengthXAD(xad);
1562 	xaddr = addressXAD(xad);
1563 
1564 	/* nXAD must be completely contained within XAD */
1565 	if ((xoff > nxoff) ||
1566 	    (nxoff + nxlen > xoff + xlen)) {
1567 		XT_PUTPAGE(mp);
1568 		jfs_error(ip->i_sb,
1569 			  "nXAD in not completely contained within XAD\n");
1570 		return -EIO;
1571 	}
1572 
1573 	index = index0;
1574 	newindex = index + 1;
1575 	nextindex = le16_to_cpu(p->header.nextindex);
1576 
1577 	if (xoff < nxoff)
1578 		goto coalesceRight;
1579 
1580 	/*
1581 	 * coalesce with left XAD
1582 	 */
1583 	/* is XAD first entry of page ? */
1584 	if (index == XTENTRYSTART)
1585 		goto replace;
1586 
1587 	/* is nXAD logically and physically contiguous with lXAD ? */
1588 	lxad = &p->xad[index - 1];
1589 	lxlen = lengthXAD(lxad);
1590 	if (!(lxad->flag & XAD_NOTRECORDED) &&
1591 	    (nxoff == offsetXAD(lxad) + lxlen) &&
1592 	    (nxaddr == addressXAD(lxad) + lxlen) &&
1593 	    (lxlen + nxlen < MAXXLEN)) {
1594 		/* extend right lXAD */
1595 		index0 = index - 1;
1596 		XADlength(lxad, lxlen + nxlen);
1597 
1598 		/* If we just merged two extents together, need to make sure the
1599 		 * right extent gets logged.  If the left one is marked XAD_NEW,
1600 		 * then we know it will be logged.  Otherwise, mark as
1601 		 * XAD_EXTENDED
1602 		 */
1603 		if (!(lxad->flag & XAD_NEW))
1604 			lxad->flag |= XAD_EXTENDED;
1605 
1606 		if (xlen > nxlen) {
1607 			/* truncate XAD */
1608 			XADoffset(xad, xoff + nxlen);
1609 			XADlength(xad, xlen - nxlen);
1610 			XADaddress(xad, xaddr + nxlen);
1611 			goto out;
1612 		} else {	/* (xlen == nxlen) */
1613 
1614 			/* remove XAD */
1615 			if (index < nextindex - 1)
1616 				memmove(&p->xad[index], &p->xad[index + 1],
1617 					(nextindex - index -
1618 					 1) << L2XTSLOTSIZE);
1619 
1620 			p->header.nextindex =
1621 			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1622 					1);
1623 
1624 			index = index0;
1625 			newindex = index + 1;
1626 			nextindex = le16_to_cpu(p->header.nextindex);
1627 			xoff = nxoff = offsetXAD(lxad);
1628 			xlen = nxlen = lxlen + nxlen;
1629 			xaddr = nxaddr = addressXAD(lxad);
1630 			goto coalesceRight;
1631 		}
1632 	}
1633 
1634 	/*
1635 	 * replace XAD with nXAD
1636 	 */
1637       replace:			/* (nxoff == xoff) */
1638 	if (nxlen == xlen) {
1639 		/* replace XAD with nXAD:recorded */
1640 		*xad = *nxad;
1641 		xad->flag = xflag & ~XAD_NOTRECORDED;
1642 
1643 		goto coalesceRight;
1644 	} else			/* (nxlen < xlen) */
1645 		goto updateLeft;
1646 
1647 	/*
1648 	 * coalesce with right XAD
1649 	 */
1650       coalesceRight:		/* (xoff <= nxoff) */
1651 	/* is XAD last entry of page ? */
1652 	if (newindex == nextindex) {
1653 		if (xoff == nxoff)
1654 			goto out;
1655 		goto updateRight;
1656 	}
1657 
1658 	/* is nXAD logically and physically contiguous with rXAD ? */
1659 	rxad = &p->xad[index + 1];
1660 	rxlen = lengthXAD(rxad);
1661 	if (!(rxad->flag & XAD_NOTRECORDED) &&
1662 	    (nxoff + nxlen == offsetXAD(rxad)) &&
1663 	    (nxaddr + nxlen == addressXAD(rxad)) &&
1664 	    (rxlen + nxlen < MAXXLEN)) {
1665 		/* extend left rXAD */
1666 		XADoffset(rxad, nxoff);
1667 		XADlength(rxad, rxlen + nxlen);
1668 		XADaddress(rxad, nxaddr);
1669 
1670 		/* If we just merged two extents together, need to make sure
1671 		 * the left extent gets logged.  If the right one is marked
1672 		 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
1673 		 * XAD_EXTENDED
1674 		 */
1675 		if (!(rxad->flag & XAD_NEW))
1676 			rxad->flag |= XAD_EXTENDED;
1677 
1678 		if (xlen > nxlen)
1679 			/* truncate XAD */
1680 			XADlength(xad, xlen - nxlen);
1681 		else {		/* (xlen == nxlen) */
1682 
1683 			/* remove XAD */
1684 			memmove(&p->xad[index], &p->xad[index + 1],
1685 				(nextindex - index - 1) << L2XTSLOTSIZE);
1686 
1687 			p->header.nextindex =
1688 			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
1689 					1);
1690 		}
1691 
1692 		goto out;
1693 	} else if (xoff == nxoff)
1694 		goto out;
1695 
1696 	if (xoff >= nxoff) {
1697 		XT_PUTPAGE(mp);
1698 		jfs_error(ip->i_sb, "xoff >= nxoff\n");
1699 		return -EIO;
1700 	}
1701 
1702 	/*
1703 	 * split XAD into (lXAD, nXAD):
1704 	 *
1705 	 *          |---nXAD--->
1706 	 * --|----------XAD----------|--
1707 	 *   |-lXAD-|
1708 	 */
1709       updateRight:		/* (xoff < nxoff) */
1710 	/* truncate old XAD as lXAD:not_recorded */
1711 	xad = &p->xad[index];
1712 	XADlength(xad, nxoff - xoff);
1713 
1714 	/* insert nXAD:recorded */
1715 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1716 
1717 		/* xtSpliUp() unpins leaf pages */
1718 		split.mp = mp;
1719 		split.index = newindex;
1720 		split.flag = xflag & ~XAD_NOTRECORDED;
1721 		split.off = nxoff;
1722 		split.len = nxlen;
1723 		split.addr = nxaddr;
1724 		split.pxdlist = NULL;
1725 		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1726 			return rc;
1727 
1728 		/* get back old page */
1729 		p = xt_getpage(ip, bn, &mp);
1730 		if (IS_ERR(p))
1731 			return PTR_ERR(p);
1732 		/*
1733 		 * if leaf root has been split, original root has been
1734 		 * copied to new child page, i.e., original entry now
1735 		 * resides on the new child page;
1736 		 */
1737 		if (p->header.flag & BT_INTERNAL) {
1738 			ASSERT(p->header.nextindex ==
1739 			       cpu_to_le16(XTENTRYSTART + 1));
1740 			xad = &p->xad[XTENTRYSTART];
1741 			bn = addressXAD(xad);
1742 			XT_PUTPAGE(mp);
1743 
1744 			/* get new child page */
1745 			p = xt_getpage(ip, bn, &mp);
1746 			if (IS_ERR(p))
1747 				return PTR_ERR(p);
1748 
1749 			BT_MARK_DIRTY(mp, ip);
1750 			if (!test_cflag(COMMIT_Nolink, ip)) {
1751 				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1752 				xtlck = (struct xtlock *) & tlck->lock;
1753 			}
1754 		} else {
1755 			/* is nXAD on new page ? */
1756 			if (newindex >
1757 			    (le16_to_cpu(p->header.maxentry) >> 1)) {
1758 				newindex =
1759 				    newindex -
1760 				    le16_to_cpu(p->header.nextindex) +
1761 				    XTENTRYSTART;
1762 				newpage = 1;
1763 			}
1764 		}
1765 	} else {
1766 		/* if insert into middle, shift right remaining entries */
1767 		if (newindex < nextindex)
1768 			memmove(&p->xad[newindex + 1], &p->xad[newindex],
1769 				(nextindex - newindex) << L2XTSLOTSIZE);
1770 
1771 		/* insert the entry */
1772 		xad = &p->xad[newindex];
1773 		*xad = *nxad;
1774 		xad->flag = xflag & ~XAD_NOTRECORDED;
1775 
1776 		/* advance next available entry index. */
1777 		p->header.nextindex =
1778 		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1779 	}
1780 
1781 	/*
1782 	 * does nXAD force 3-way split ?
1783 	 *
1784 	 *          |---nXAD--->|
1785 	 * --|----------XAD-------------|--
1786 	 *   |-lXAD-|           |-rXAD -|
1787 	 */
1788 	if (nxoff + nxlen == xoff + xlen)
1789 		goto out;
1790 
1791 	/* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
1792 	if (newpage) {
1793 		/* close out old page */
1794 		if (!test_cflag(COMMIT_Nolink, ip)) {
1795 			xtlck->lwm.offset = (xtlck->lwm.offset) ?
1796 			    min(index0, (int)xtlck->lwm.offset) : index0;
1797 			xtlck->lwm.length =
1798 			    le16_to_cpu(p->header.nextindex) -
1799 			    xtlck->lwm.offset;
1800 		}
1801 
1802 		bn = le64_to_cpu(p->header.next);
1803 		XT_PUTPAGE(mp);
1804 
1805 		/* get new right page */
1806 		p = xt_getpage(ip, bn, &mp);
1807 		if (IS_ERR(p))
1808 			return PTR_ERR(p);
1809 
1810 		BT_MARK_DIRTY(mp, ip);
1811 		if (!test_cflag(COMMIT_Nolink, ip)) {
1812 			tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
1813 			xtlck = (struct xtlock *) & tlck->lock;
1814 		}
1815 
1816 		index0 = index = newindex;
1817 	} else
1818 		index++;
1819 
1820 	newindex = index + 1;
1821 	nextindex = le16_to_cpu(p->header.nextindex);
1822 	xlen = xlen - (nxoff - xoff);
1823 	xoff = nxoff;
1824 	xaddr = nxaddr;
1825 
1826 	/* recompute split pages */
1827 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1828 		XT_PUTPAGE(mp);
1829 
1830 		if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1831 			return rc;
1832 
1833 		/* retrieve search result */
1834 		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
1835 
1836 		if (cmp != 0) {
1837 			XT_PUTPAGE(mp);
1838 			jfs_error(ip->i_sb, "xtSearch failed\n");
1839 			return -EIO;
1840 		}
1841 
1842 		if (index0 != index) {
1843 			XT_PUTPAGE(mp);
1844 			jfs_error(ip->i_sb, "unexpected value of index\n");
1845 			return -EIO;
1846 		}
1847 	}
1848 
1849 	/*
1850 	 * split XAD into (nXAD, rXAD)
1851 	 *
1852 	 *          ---nXAD---|
1853 	 * --|----------XAD----------|--
1854 	 *                    |-rXAD-|
1855 	 */
1856       updateLeft:		/* (nxoff == xoff) && (nxlen < xlen) */
1857 	/* update old XAD with nXAD:recorded */
1858 	xad = &p->xad[index];
1859 	*xad = *nxad;
1860 	xad->flag = xflag & ~XAD_NOTRECORDED;
1861 
1862 	/* insert rXAD:not_recorded */
1863 	xoff = xoff + nxlen;
1864 	xlen = xlen - nxlen;
1865 	xaddr = xaddr + nxlen;
1866 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
1867 /*
1868 printf("xtUpdate.updateLeft.split p:0x%p\n", p);
1869 */
1870 		/* xtSpliUp() unpins leaf pages */
1871 		split.mp = mp;
1872 		split.index = newindex;
1873 		split.flag = xflag;
1874 		split.off = xoff;
1875 		split.len = xlen;
1876 		split.addr = xaddr;
1877 		split.pxdlist = NULL;
1878 		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
1879 			return rc;
1880 
1881 		/* get back old page */
1882 		p = xt_getpage(ip, bn, &mp);
1883 		if (IS_ERR(p))
1884 			return PTR_ERR(p);
1885 
1886 		/*
1887 		 * if leaf root has been split, original root has been
1888 		 * copied to new child page, i.e., original entry now
1889 		 * resides on the new child page;
1890 		 */
1891 		if (p->header.flag & BT_INTERNAL) {
1892 			ASSERT(p->header.nextindex ==
1893 			       cpu_to_le16(XTENTRYSTART + 1));
1894 			xad = &p->xad[XTENTRYSTART];
1895 			bn = addressXAD(xad);
1896 			XT_PUTPAGE(mp);
1897 
1898 			/* get new child page */
1899 			p = xt_getpage(ip, bn, &mp);
1900 			if (IS_ERR(p))
1901 				return PTR_ERR(p);
1902 
1903 			BT_MARK_DIRTY(mp, ip);
1904 			if (!test_cflag(COMMIT_Nolink, ip)) {
1905 				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
1906 				xtlck = (struct xtlock *) & tlck->lock;
1907 			}
1908 		}
1909 	} else {
1910 		/* if insert into middle, shift right remaining entries */
1911 		if (newindex < nextindex)
1912 			memmove(&p->xad[newindex + 1], &p->xad[newindex],
1913 				(nextindex - newindex) << L2XTSLOTSIZE);
1914 
1915 		/* insert the entry */
1916 		xad = &p->xad[newindex];
1917 		XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
1918 
1919 		/* advance next available entry index. */
1920 		p->header.nextindex =
1921 		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
1922 	}
1923 
1924       out:
1925 	if (!test_cflag(COMMIT_Nolink, ip)) {
1926 		xtlck->lwm.offset = (xtlck->lwm.offset) ?
1927 		    min(index0, (int)xtlck->lwm.offset) : index0;
1928 		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
1929 		    xtlck->lwm.offset;
1930 	}
1931 
1932 	/* unpin the leaf page */
1933 	XT_PUTPAGE(mp);
1934 
1935 	return rc;
1936 }
1937 
1938 
1939 /*
1940  *	xtAppend()
1941  *
1942  * function: grow in append mode from contiguous region specified ;
1943  *
1944  * parameter:
1945  *	tid		- transaction id;
1946  *	ip		- file object;
1947  *	xflag		- extent flag:
1948  *	xoff		- extent offset;
1949  *	maxblocks	- max extent length;
1950  *	xlen		- extent length (in/out);
1951  *	xaddrp		- extent address pointer (in/out):
1952  *	flag		-
1953  *
1954  * return:
1955  */
1956 int xtAppend(tid_t tid,		/* transaction id */
1957 	     struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
1958 	     s32 * xlenp,	/* (in/out) */
1959 	     s64 * xaddrp,	/* (in/out) */
1960 	     int flag)
1961 {
1962 	int rc = 0;
1963 	struct metapage *mp;	/* meta-page buffer */
1964 	xtpage_t *p;		/* base B+-tree index page */
1965 	s64 bn, xaddr;
1966 	int index, nextindex;
1967 	struct btstack btstack;	/* traverse stack */
1968 	struct xtsplit split;	/* split information */
1969 	xad_t *xad;
1970 	int cmp;
1971 	struct tlock *tlck;
1972 	struct xtlock *xtlck;
1973 	int nsplit, nblocks, xlen;
1974 	struct pxdlist pxdlist;
1975 	pxd_t *pxd;
1976 	s64 next;
1977 
1978 	xaddr = *xaddrp;
1979 	xlen = *xlenp;
1980 	jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
1981 		 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
1982 
1983 	/*
1984 	 *	search for the entry location at which to insert:
1985 	 *
1986 	 * xtFastSearch() and xtSearch() both returns (leaf page
1987 	 * pinned, index at which to insert).
1988 	 * n.b. xtSearch() may return index of maxentry of
1989 	 * the full page.
1990 	 */
1991 	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
1992 		return rc;
1993 
1994 	/* retrieve search result */
1995 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
1996 
1997 	if (cmp == 0) {
1998 		rc = -EEXIST;
1999 		goto out;
2000 	}
2001 
2002 	if (next)
2003 		xlen = min(xlen, (int)(next - xoff));
2004 //insert:
2005 	/*
2006 	 *	insert entry for new extent
2007 	 */
2008 	xflag |= XAD_NEW;
2009 
2010 	/*
2011 	 *	if the leaf page is full, split the page and
2012 	 *	propagate up the router entry for the new page from split
2013 	 *
2014 	 * The xtSplitUp() will insert the entry and unpin the leaf page.
2015 	 */
2016 	nextindex = le16_to_cpu(p->header.nextindex);
2017 	if (nextindex < le16_to_cpu(p->header.maxentry))
2018 		goto insertLeaf;
2019 
2020 	/*
2021 	 * allocate new index blocks to cover index page split(s)
2022 	 */
2023 	nsplit = btstack.nsplit;
2024 	split.pxdlist = &pxdlist;
2025 	pxdlist.maxnpxd = pxdlist.npxd = 0;
2026 	pxd = &pxdlist.pxd[0];
2027 	nblocks = JFS_SBI(ip->i_sb)->nbperpage;
2028 	for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
2029 		if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
2030 			PXDaddress(pxd, xaddr);
2031 			PXDlength(pxd, nblocks);
2032 
2033 			pxdlist.maxnpxd++;
2034 
2035 			continue;
2036 		}
2037 
2038 		/* undo allocation */
2039 
2040 		goto out;
2041 	}
2042 
2043 	xlen = min(xlen, maxblocks);
2044 
2045 	/*
2046 	 * allocate data extent requested
2047 	 */
2048 	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2049 		goto out;
2050 
2051 	split.mp = mp;
2052 	split.index = index;
2053 	split.flag = xflag;
2054 	split.off = xoff;
2055 	split.len = xlen;
2056 	split.addr = xaddr;
2057 	if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
2058 		/* undo data extent allocation */
2059 		dbFree(ip, *xaddrp, (s64) * xlenp);
2060 
2061 		return rc;
2062 	}
2063 
2064 	*xaddrp = xaddr;
2065 	*xlenp = xlen;
2066 	return 0;
2067 
2068 	/*
2069 	 *	insert the new entry into the leaf page
2070 	 */
2071       insertLeaf:
2072 	/*
2073 	 * allocate data extent requested
2074 	 */
2075 	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
2076 		goto out;
2077 
2078 	BT_MARK_DIRTY(mp, ip);
2079 	/*
2080 	 * acquire a transaction lock on the leaf page;
2081 	 *
2082 	 * action: xad insertion/extension;
2083 	 */
2084 	tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
2085 	xtlck = (struct xtlock *) & tlck->lock;
2086 
2087 	/* insert the new entry: mark the entry NEW */
2088 	xad = &p->xad[index];
2089 	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
2090 
2091 	/* advance next available entry index */
2092 	le16_add_cpu(&p->header.nextindex, 1);
2093 
2094 	xtlck->lwm.offset =
2095 	    (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
2096 	xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
2097 	    xtlck->lwm.offset;
2098 
2099 	*xaddrp = xaddr;
2100 	*xlenp = xlen;
2101 
2102       out:
2103 	/* unpin the leaf page */
2104 	XT_PUTPAGE(mp);
2105 
2106 	return rc;
2107 }
2108 
2109 /*
2110  *	xtInitRoot()
2111  *
2112  * initialize file root (inline in inode)
2113  */
2114 void xtInitRoot(tid_t tid, struct inode *ip)
2115 {
2116 	xtroot_t *p;
2117 
2118 	/*
2119 	 * acquire a transaction lock on the root
2120 	 *
2121 	 * action:
2122 	 */
2123 	txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
2124 		      tlckXTREE | tlckNEW);
2125 	p = &JFS_IP(ip)->i_xtroot;
2126 
2127 	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
2128 	p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2129 
2130 	if (S_ISDIR(ip->i_mode))
2131 		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
2132 	else {
2133 		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
2134 		ip->i_size = 0;
2135 	}
2136 
2137 
2138 	return;
2139 }
2140 
2141 
2142 /*
2143  * We can run into a deadlock truncating a file with a large number of
2144  * xtree pages (large fragmented file).  A robust fix would entail a
2145  * reservation system where we would reserve a number of metadata pages
2146  * and tlocks which we would be guaranteed without a deadlock.  Without
2147  * this, a partial fix is to limit number of metadata pages we will lock
2148  * in a single transaction.  Currently we will truncate the file so that
2149  * no more than 50 leaf pages will be locked.  The caller of xtTruncate
2150  * will be responsible for ensuring that the current transaction gets
2151  * committed, and that subsequent transactions are created to truncate
2152  * the file further if needed.
2153  */
2154 #define MAX_TRUNCATE_LEAVES 50
2155 
2156 /*
2157  *	xtTruncate()
2158  *
2159  * function:
2160  *	traverse for truncation logging backward bottom up;
2161  *	terminate at the last extent entry at the current subtree
2162  *	root page covering new down size.
2163  *	truncation may occur within the last extent entry.
2164  *
2165  * parameter:
2166  *	int		tid,
2167  *	struct inode	*ip,
2168  *	s64		newsize,
2169  *	int		type)	{PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
2170  *
2171  * return:
2172  *
2173  * note:
2174  *	PWMAP:
2175  *	 1. truncate (non-COMMIT_NOLINK file)
2176  *	    by jfs_truncate() or jfs_open(O_TRUNC):
2177  *	    xtree is updated;
2178  *	 2. truncate index table of directory when last entry removed
2179  *	map update via tlock at commit time;
2180  *	PMAP:
2181  *	 Call xtTruncate_pmap instead
2182  *	WMAP:
2183  *	 1. remove (free zero link count) on last reference release
2184  *	    (pmap has been freed at commit zero link count);
2185  *	 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
2186  *	    xtree is updated;
2187  *	 map update directly at truncation time;
2188  *
2189  *	if (DELETE)
2190  *		no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
2191  *	else if (TRUNCATE)
2192  *		must write LOG_NOREDOPAGE for deleted index page;
2193  *
2194  * pages may already have been tlocked by anonymous transactions
2195  * during file growth (i.e., write) before truncation;
2196  *
2197  * except last truncated entry, deleted entries remains as is
2198  * in the page (nextindex is updated) for other use
2199  * (e.g., log/update allocation map): this avoid copying the page
2200  * info but delay free of pages;
2201  *
2202  */
2203 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
2204 {
2205 	s64 teof;
2206 	struct metapage *mp;
2207 	xtpage_t *p;
2208 	s64 bn;
2209 	int index, nextindex;
2210 	xad_t *xad;
2211 	s64 xoff, xaddr;
2212 	int xlen, len, freexlen;
2213 	struct btstack btstack;
2214 	struct btframe *parent;
2215 	struct tblock *tblk = NULL;
2216 	struct tlock *tlck = NULL;
2217 	struct xtlock *xtlck = NULL;
2218 	struct xdlistlock xadlock;	/* maplock for COMMIT_WMAP */
2219 	struct pxd_lock *pxdlock;		/* maplock for COMMIT_WMAP */
2220 	s64 nfreed;
2221 	int freed, log;
2222 	int locked_leaves = 0;
2223 
2224 	/* save object truncation type */
2225 	if (tid) {
2226 		tblk = tid_to_tblock(tid);
2227 		tblk->xflag |= flag;
2228 	}
2229 
2230 	nfreed = 0;
2231 
2232 	flag &= COMMIT_MAP;
2233 	assert(flag != COMMIT_PMAP);
2234 
2235 	if (flag == COMMIT_PWMAP)
2236 		log = 1;
2237 	else {
2238 		log = 0;
2239 		xadlock.flag = mlckFREEXADLIST;
2240 		xadlock.index = 1;
2241 	}
2242 
2243 	/*
2244 	 * if the newsize is not an integral number of pages,
2245 	 * the file between newsize and next page boundary will
2246 	 * be cleared.
2247 	 * if truncating into a file hole, it will cause
2248 	 * a full block to be allocated for the logical block.
2249 	 */
2250 
2251 	/*
2252 	 * release page blocks of truncated region <teof, eof>
2253 	 *
2254 	 * free the data blocks from the leaf index blocks.
2255 	 * delete the parent index entries corresponding to
2256 	 * the freed child data/index blocks.
2257 	 * free the index blocks themselves which aren't needed
2258 	 * in new sized file.
2259 	 *
2260 	 * index blocks are updated only if the blocks are to be
2261 	 * retained in the new sized file.
2262 	 * if type is PMAP, the data and index pages are NOT
2263 	 * freed, and the data and index blocks are NOT freed
2264 	 * from working map.
2265 	 * (this will allow continued access of data/index of
2266 	 * temporary file (zerolink count file truncated to zero-length)).
2267 	 */
2268 	teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
2269 	    JFS_SBI(ip->i_sb)->l2bsize;
2270 
2271 	/* clear stack */
2272 	BT_CLR(&btstack);
2273 
2274 	/*
2275 	 * start with root
2276 	 *
2277 	 * root resides in the inode
2278 	 */
2279 	bn = 0;
2280 
2281 	/*
2282 	 * first access of each page:
2283 	 */
2284       getPage:
2285 	p = xt_getpage(ip, bn, &mp);
2286 	if (IS_ERR(p))
2287 		return PTR_ERR(p);
2288 
2289 	/* process entries backward from last index */
2290 	index = le16_to_cpu(p->header.nextindex) - 1;
2291 
2292 
2293 	/* Since this is the rightmost page at this level, and we may have
2294 	 * already freed a page that was formerly to the right, let's make
2295 	 * sure that the next pointer is zero.
2296 	 */
2297 	if (p->header.next) {
2298 		if (log)
2299 			/*
2300 			 * Make sure this change to the header is logged.
2301 			 * If we really truncate this leaf, the flag
2302 			 * will be changed to tlckTRUNCATE
2303 			 */
2304 			tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
2305 		BT_MARK_DIRTY(mp, ip);
2306 		p->header.next = 0;
2307 	}
2308 
2309 	if (p->header.flag & BT_INTERNAL)
2310 		goto getChild;
2311 
2312 	/*
2313 	 *	leaf page
2314 	 */
2315 	freed = 0;
2316 
2317 	/* does region covered by leaf page precede Teof ? */
2318 	xad = &p->xad[index];
2319 	xoff = offsetXAD(xad);
2320 	xlen = lengthXAD(xad);
2321 	if (teof >= xoff + xlen) {
2322 		XT_PUTPAGE(mp);
2323 		goto getParent;
2324 	}
2325 
2326 	/* (re)acquire tlock of the leaf page */
2327 	if (log) {
2328 		if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
2329 			/*
2330 			 * We need to limit the size of the transaction
2331 			 * to avoid exhausting pagecache & tlocks
2332 			 */
2333 			XT_PUTPAGE(mp);
2334 			newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
2335 			goto getParent;
2336 		}
2337 		tlck = txLock(tid, ip, mp, tlckXTREE);
2338 		tlck->type = tlckXTREE | tlckTRUNCATE;
2339 		xtlck = (struct xtlock *) & tlck->lock;
2340 		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
2341 	}
2342 	BT_MARK_DIRTY(mp, ip);
2343 
2344 	/*
2345 	 * scan backward leaf page entries
2346 	 */
2347 	for (; index >= XTENTRYSTART; index--) {
2348 		xad = &p->xad[index];
2349 		xoff = offsetXAD(xad);
2350 		xlen = lengthXAD(xad);
2351 		xaddr = addressXAD(xad);
2352 
2353 		/*
2354 		 * The "data" for a directory is indexed by the block
2355 		 * device's address space.  This metadata must be invalidated
2356 		 * here
2357 		 */
2358 		if (S_ISDIR(ip->i_mode) && (teof == 0))
2359 			invalidate_xad_metapages(ip, *xad);
2360 		/*
2361 		 * entry beyond eof: continue scan of current page
2362 		 *          xad
2363 		 * ---|---=======------->
2364 		 *   eof
2365 		 */
2366 		if (teof < xoff) {
2367 			nfreed += xlen;
2368 			continue;
2369 		}
2370 
2371 		/*
2372 		 * (xoff <= teof): last entry to be deleted from page;
2373 		 * If other entries remain in page: keep and update the page.
2374 		 */
2375 
2376 		/*
2377 		 * eof == entry_start: delete the entry
2378 		 *           xad
2379 		 * -------|=======------->
2380 		 *       eof
2381 		 *
2382 		 */
2383 		if (teof == xoff) {
2384 			nfreed += xlen;
2385 
2386 			if (index == XTENTRYSTART)
2387 				break;
2388 
2389 			nextindex = index;
2390 		}
2391 		/*
2392 		 * eof within the entry: truncate the entry.
2393 		 *          xad
2394 		 * -------===|===------->
2395 		 *          eof
2396 		 */
2397 		else if (teof < xoff + xlen) {
2398 			/* update truncated entry */
2399 			len = teof - xoff;
2400 			freexlen = xlen - len;
2401 			XADlength(xad, len);
2402 
2403 			/* save pxd of truncated extent in tlck */
2404 			xaddr += len;
2405 			if (log) {	/* COMMIT_PWMAP */
2406 				xtlck->lwm.offset = (xtlck->lwm.offset) ?
2407 				    min(index, (int)xtlck->lwm.offset) : index;
2408 				xtlck->lwm.length = index + 1 -
2409 				    xtlck->lwm.offset;
2410 				xtlck->twm.offset = index;
2411 				pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
2412 				pxdlock->flag = mlckFREEPXD;
2413 				PXDaddress(&pxdlock->pxd, xaddr);
2414 				PXDlength(&pxdlock->pxd, freexlen);
2415 			}
2416 			/* free truncated extent */
2417 			else {	/* COMMIT_WMAP */
2418 
2419 				pxdlock = (struct pxd_lock *) & xadlock;
2420 				pxdlock->flag = mlckFREEPXD;
2421 				PXDaddress(&pxdlock->pxd, xaddr);
2422 				PXDlength(&pxdlock->pxd, freexlen);
2423 				txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
2424 
2425 				/* reset map lock */
2426 				xadlock.flag = mlckFREEXADLIST;
2427 			}
2428 
2429 			/* current entry is new last entry; */
2430 			nextindex = index + 1;
2431 
2432 			nfreed += freexlen;
2433 		}
2434 		/*
2435 		 * eof beyond the entry:
2436 		 *          xad
2437 		 * -------=======---|--->
2438 		 *                 eof
2439 		 */
2440 		else {		/* (xoff + xlen < teof) */
2441 
2442 			nextindex = index + 1;
2443 		}
2444 
2445 		if (nextindex < le16_to_cpu(p->header.nextindex)) {
2446 			if (!log) {	/* COMMIT_WAMP */
2447 				xadlock.xdlist = &p->xad[nextindex];
2448 				xadlock.count =
2449 				    le16_to_cpu(p->header.nextindex) -
2450 				    nextindex;
2451 				txFreeMap(ip, (struct maplock *) & xadlock,
2452 					  NULL, COMMIT_WMAP);
2453 			}
2454 			p->header.nextindex = cpu_to_le16(nextindex);
2455 		}
2456 
2457 		XT_PUTPAGE(mp);
2458 
2459 		/* assert(freed == 0); */
2460 		goto getParent;
2461 	}			/* end scan of leaf page entries */
2462 
2463 	freed = 1;
2464 
2465 	/*
2466 	 * leaf page become empty: free the page if type != PMAP
2467 	 */
2468 	if (log) {		/* COMMIT_PWMAP */
2469 		/* txCommit() with tlckFREE:
2470 		 * free data extents covered by leaf [XTENTRYSTART:hwm);
2471 		 * invalidate leaf if COMMIT_PWMAP;
2472 		 * if (TRUNCATE), will write LOG_NOREDOPAGE;
2473 		 */
2474 		tlck->type = tlckXTREE | tlckFREE;
2475 	} else {		/* COMMIT_WAMP */
2476 
2477 		/* free data extents covered by leaf */
2478 		xadlock.xdlist = &p->xad[XTENTRYSTART];
2479 		xadlock.count =
2480 		    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
2481 		txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
2482 	}
2483 
2484 	if (p->header.flag & BT_ROOT) {
2485 		p->header.flag &= ~BT_INTERNAL;
2486 		p->header.flag |= BT_LEAF;
2487 		p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2488 
2489 		XT_PUTPAGE(mp);	/* debug */
2490 		goto out;
2491 	} else {
2492 		if (log) {	/* COMMIT_PWMAP */
2493 			/* page will be invalidated at tx completion
2494 			 */
2495 			XT_PUTPAGE(mp);
2496 		} else {	/* COMMIT_WMAP */
2497 
2498 			if (mp->lid)
2499 				lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
2500 
2501 			/* invalidate empty leaf page */
2502 			discard_metapage(mp);
2503 		}
2504 	}
2505 
2506 	/*
2507 	 * the leaf page become empty: delete the parent entry
2508 	 * for the leaf page if the parent page is to be kept
2509 	 * in the new sized file.
2510 	 */
2511 
2512 	/*
2513 	 * go back up to the parent page
2514 	 */
2515       getParent:
2516 	/* pop/restore parent entry for the current child page */
2517 	if ((parent = BT_POP(&btstack)) == NULL)
2518 		/* current page must have been root */
2519 		goto out;
2520 
2521 	/* get back the parent page */
2522 	bn = parent->bn;
2523 	p = xt_getpage(ip, bn, &mp);
2524 	if (IS_ERR(p))
2525 		return PTR_ERR(p);
2526 
2527 	index = parent->index;
2528 
2529 	/*
2530 	 * child page was not empty:
2531 	 */
2532 	if (freed == 0) {
2533 		/* has any entry deleted from parent ? */
2534 		if (index < le16_to_cpu(p->header.nextindex) - 1) {
2535 			/* (re)acquire tlock on the parent page */
2536 			if (log) {	/* COMMIT_PWMAP */
2537 				/* txCommit() with tlckTRUNCATE:
2538 				 * free child extents covered by parent [);
2539 				 */
2540 				tlck = txLock(tid, ip, mp, tlckXTREE);
2541 				xtlck = (struct xtlock *) & tlck->lock;
2542 				if (!(tlck->type & tlckTRUNCATE)) {
2543 					xtlck->hwm.offset =
2544 					    le16_to_cpu(p->header.
2545 							nextindex) - 1;
2546 					tlck->type =
2547 					    tlckXTREE | tlckTRUNCATE;
2548 				}
2549 			} else {	/* COMMIT_WMAP */
2550 
2551 				/* free child extents covered by parent */
2552 				xadlock.xdlist = &p->xad[index + 1];
2553 				xadlock.count =
2554 				    le16_to_cpu(p->header.nextindex) -
2555 				    index - 1;
2556 				txFreeMap(ip, (struct maplock *) & xadlock,
2557 					  NULL, COMMIT_WMAP);
2558 			}
2559 			BT_MARK_DIRTY(mp, ip);
2560 
2561 			p->header.nextindex = cpu_to_le16(index + 1);
2562 		}
2563 		XT_PUTPAGE(mp);
2564 		goto getParent;
2565 	}
2566 
2567 	/*
2568 	 * child page was empty:
2569 	 */
2570 	nfreed += lengthXAD(&p->xad[index]);
2571 
2572 	/*
2573 	 * During working map update, child page's tlock must be handled
2574 	 * before parent's.  This is because the parent's tlock will cause
2575 	 * the child's disk space to be marked available in the wmap, so
2576 	 * it's important that the child page be released by that time.
2577 	 *
2578 	 * ToDo:  tlocks should be on doubly-linked list, so we can
2579 	 * quickly remove it and add it to the end.
2580 	 */
2581 
2582 	/*
2583 	 * Move parent page's tlock to the end of the tid's tlock list
2584 	 */
2585 	if (log && mp->lid && (tblk->last != mp->lid) &&
2586 	    lid_to_tlock(mp->lid)->tid) {
2587 		lid_t lid = mp->lid;
2588 		struct tlock *prev;
2589 
2590 		tlck = lid_to_tlock(lid);
2591 
2592 		if (tblk->next == lid)
2593 			tblk->next = tlck->next;
2594 		else {
2595 			for (prev = lid_to_tlock(tblk->next);
2596 			     prev->next != lid;
2597 			     prev = lid_to_tlock(prev->next)) {
2598 				assert(prev->next);
2599 			}
2600 			prev->next = tlck->next;
2601 		}
2602 		lid_to_tlock(tblk->last)->next = lid;
2603 		tlck->next = 0;
2604 		tblk->last = lid;
2605 	}
2606 
2607 	/*
2608 	 * parent page become empty: free the page
2609 	 */
2610 	if (index == XTENTRYSTART) {
2611 		if (log) {	/* COMMIT_PWMAP */
2612 			/* txCommit() with tlckFREE:
2613 			 * free child extents covered by parent;
2614 			 * invalidate parent if COMMIT_PWMAP;
2615 			 */
2616 			tlck = txLock(tid, ip, mp, tlckXTREE);
2617 			xtlck = (struct xtlock *) & tlck->lock;
2618 			xtlck->hwm.offset =
2619 			    le16_to_cpu(p->header.nextindex) - 1;
2620 			tlck->type = tlckXTREE | tlckFREE;
2621 		} else {	/* COMMIT_WMAP */
2622 
2623 			/* free child extents covered by parent */
2624 			xadlock.xdlist = &p->xad[XTENTRYSTART];
2625 			xadlock.count =
2626 			    le16_to_cpu(p->header.nextindex) -
2627 			    XTENTRYSTART;
2628 			txFreeMap(ip, (struct maplock *) & xadlock, NULL,
2629 				  COMMIT_WMAP);
2630 		}
2631 		BT_MARK_DIRTY(mp, ip);
2632 
2633 		if (p->header.flag & BT_ROOT) {
2634 			p->header.flag &= ~BT_INTERNAL;
2635 			p->header.flag |= BT_LEAF;
2636 			p->header.nextindex = cpu_to_le16(XTENTRYSTART);
2637 			if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
2638 				/*
2639 				 * Shrink root down to allow inline
2640 				 * EA (otherwise fsck complains)
2641 				 */
2642 				p->header.maxentry =
2643 				    cpu_to_le16(XTROOTINITSLOT);
2644 				JFS_IP(ip)->mode2 |= INLINEEA;
2645 			}
2646 
2647 			XT_PUTPAGE(mp);	/* debug */
2648 			goto out;
2649 		} else {
2650 			if (log) {	/* COMMIT_PWMAP */
2651 				/* page will be invalidated at tx completion
2652 				 */
2653 				XT_PUTPAGE(mp);
2654 			} else {	/* COMMIT_WMAP */
2655 
2656 				if (mp->lid)
2657 					lid_to_tlock(mp->lid)->flag |=
2658 						tlckFREELOCK;
2659 
2660 				/* invalidate parent page */
2661 				discard_metapage(mp);
2662 			}
2663 
2664 			/* parent has become empty and freed:
2665 			 * go back up to its parent page
2666 			 */
2667 			/* freed = 1; */
2668 			goto getParent;
2669 		}
2670 	}
2671 	/*
2672 	 * parent page still has entries for front region;
2673 	 */
2674 	else {
2675 		/* try truncate region covered by preceding entry
2676 		 * (process backward)
2677 		 */
2678 		index--;
2679 
2680 		/* go back down to the child page corresponding
2681 		 * to the entry
2682 		 */
2683 		goto getChild;
2684 	}
2685 
2686 	/*
2687 	 *	internal page: go down to child page of current entry
2688 	 */
2689       getChild:
2690 	/* save current parent entry for the child page */
2691 	if (BT_STACK_FULL(&btstack)) {
2692 		jfs_error(ip->i_sb, "stack overrun!\n");
2693 		XT_PUTPAGE(mp);
2694 		return -EIO;
2695 	}
2696 	BT_PUSH(&btstack, bn, index);
2697 
2698 	/* get child page */
2699 	xad = &p->xad[index];
2700 	bn = addressXAD(xad);
2701 
2702 	/*
2703 	 * first access of each internal entry:
2704 	 */
2705 	/* release parent page */
2706 	XT_PUTPAGE(mp);
2707 
2708 	/* process the child page */
2709 	goto getPage;
2710 
2711       out:
2712 	/*
2713 	 * update file resource stat
2714 	 */
2715 	/* set size
2716 	 */
2717 	if (S_ISDIR(ip->i_mode) && !newsize)
2718 		ip->i_size = 1;	/* fsck hates zero-length directories */
2719 	else
2720 		ip->i_size = newsize;
2721 
2722 	/* update quota allocation to reflect freed blocks */
2723 	dquot_free_block(ip, nfreed);
2724 
2725 	/*
2726 	 * free tlock of invalidated pages
2727 	 */
2728 	if (flag == COMMIT_WMAP)
2729 		txFreelock(ip);
2730 
2731 	return newsize;
2732 }
2733 
2734 
2735 /*
2736  *	xtTruncate_pmap()
2737  *
2738  * function:
2739  *	Perform truncate to zero length for deleted file, leaving the
2740  *	xtree and working map untouched.  This allows the file to
2741  *	be accessed via open file handles, while the delete of the file
2742  *	is committed to disk.
2743  *
2744  * parameter:
2745  *	tid_t		tid,
2746  *	struct inode	*ip,
2747  *	s64		committed_size)
2748  *
2749  * return: new committed size
2750  *
2751  * note:
2752  *
2753  *	To avoid deadlock by holding too many transaction locks, the
2754  *	truncation may be broken up into multiple transactions.
2755  *	The committed_size keeps track of part of the file has been
2756  *	freed from the pmaps.
2757  */
2758 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
2759 {
2760 	s64 bn;
2761 	struct btstack btstack;
2762 	int cmp;
2763 	int index;
2764 	int locked_leaves = 0;
2765 	struct metapage *mp;
2766 	xtpage_t *p;
2767 	struct btframe *parent;
2768 	int rc;
2769 	struct tblock *tblk;
2770 	struct tlock *tlck = NULL;
2771 	xad_t *xad;
2772 	int xlen;
2773 	s64 xoff;
2774 	struct xtlock *xtlck = NULL;
2775 
2776 	/* save object truncation type */
2777 	tblk = tid_to_tblock(tid);
2778 	tblk->xflag |= COMMIT_PMAP;
2779 
2780 	/* clear stack */
2781 	BT_CLR(&btstack);
2782 
2783 	if (committed_size) {
2784 		xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
2785 		rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2786 		if (rc)
2787 			return rc;
2788 
2789 		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
2790 
2791 		if (cmp != 0) {
2792 			XT_PUTPAGE(mp);
2793 			jfs_error(ip->i_sb, "did not find extent\n");
2794 			return -EIO;
2795 		}
2796 	} else {
2797 		/*
2798 		 * start with root
2799 		 *
2800 		 * root resides in the inode
2801 		 */
2802 		bn = 0;
2803 
2804 		/*
2805 		 * first access of each page:
2806 		 */
2807       getPage:
2808 		p = xt_getpage(ip, bn, &mp);
2809 		if (IS_ERR(p))
2810 			return PTR_ERR(p);
2811 
2812 		/* process entries backward from last index */
2813 		index = le16_to_cpu(p->header.nextindex) - 1;
2814 
2815 		if (p->header.flag & BT_INTERNAL)
2816 			goto getChild;
2817 	}
2818 
2819 	/*
2820 	 *	leaf page
2821 	 */
2822 
2823 	if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
2824 		/*
2825 		 * We need to limit the size of the transaction
2826 		 * to avoid exhausting pagecache & tlocks
2827 		 */
2828 		xad = &p->xad[index];
2829 		xoff = offsetXAD(xad);
2830 		xlen = lengthXAD(xad);
2831 		XT_PUTPAGE(mp);
2832 		return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
2833 	}
2834 	tlck = txLock(tid, ip, mp, tlckXTREE);
2835 	tlck->type = tlckXTREE | tlckFREE;
2836 	xtlck = (struct xtlock *) & tlck->lock;
2837 	xtlck->hwm.offset = index;
2838 
2839 
2840 	XT_PUTPAGE(mp);
2841 
2842 	/*
2843 	 * go back up to the parent page
2844 	 */
2845       getParent:
2846 	/* pop/restore parent entry for the current child page */
2847 	if ((parent = BT_POP(&btstack)) == NULL)
2848 		/* current page must have been root */
2849 		goto out;
2850 
2851 	/* get back the parent page */
2852 	bn = parent->bn;
2853 	p = xt_getpage(ip, bn, &mp);
2854 	if (IS_ERR(p))
2855 		return PTR_ERR(p);
2856 
2857 	index = parent->index;
2858 
2859 	/*
2860 	 * parent page become empty: free the page
2861 	 */
2862 	if (index == XTENTRYSTART) {
2863 		/* txCommit() with tlckFREE:
2864 		 * free child extents covered by parent;
2865 		 * invalidate parent if COMMIT_PWMAP;
2866 		 */
2867 		tlck = txLock(tid, ip, mp, tlckXTREE);
2868 		xtlck = (struct xtlock *) & tlck->lock;
2869 		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
2870 		tlck->type = tlckXTREE | tlckFREE;
2871 
2872 		XT_PUTPAGE(mp);
2873 
2874 		if (p->header.flag & BT_ROOT) {
2875 
2876 			goto out;
2877 		} else {
2878 			goto getParent;
2879 		}
2880 	}
2881 	/*
2882 	 * parent page still has entries for front region;
2883 	 */
2884 	else
2885 		index--;
2886 	/*
2887 	 *	internal page: go down to child page of current entry
2888 	 */
2889       getChild:
2890 	/* save current parent entry for the child page */
2891 	if (BT_STACK_FULL(&btstack)) {
2892 		jfs_error(ip->i_sb, "stack overrun!\n");
2893 		XT_PUTPAGE(mp);
2894 		return -EIO;
2895 	}
2896 	BT_PUSH(&btstack, bn, index);
2897 
2898 	/* get child page */
2899 	xad = &p->xad[index];
2900 	bn = addressXAD(xad);
2901 
2902 	/*
2903 	 * first access of each internal entry:
2904 	 */
2905 	/* release parent page */
2906 	XT_PUTPAGE(mp);
2907 
2908 	/* process the child page */
2909 	goto getPage;
2910 
2911       out:
2912 
2913 	return 0;
2914 }
2915 
2916 #ifdef CONFIG_JFS_STATISTICS
2917 int jfs_xtstat_proc_show(struct seq_file *m, void *v)
2918 {
2919 	seq_printf(m,
2920 		       "JFS Xtree statistics\n"
2921 		       "====================\n"
2922 		       "searches = %d\n"
2923 		       "fast searches = %d\n"
2924 		       "splits = %d\n",
2925 		       xtStat.search,
2926 		       xtStat.fastSearch,
2927 		       xtStat.split);
2928 	return 0;
2929 }
2930 #endif
2931