xref: /titanic_41/usr/src/lib/watchmalloc/common/malloc.c (revision 2917a9c9c3eee6fcaedb239f5f68da01f4ed0da9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  *	Memory management: malloc(), realloc(), free(), memalign().
34  *
35  *	The following #-parameters may be redefined:
36  *	GETCORE: a function to get more core memory.
37  *		GETCORE(0) is assumed to return the next available
38  *		address. Default is 'sbrk'.
39  *	ERRCORE: the error code as returned by GETCORE.
40  *		Default is ((char *)(-1)).
41  *	CORESIZE: a desired unit (measured in bytes) to be used
42  *		with GETCORE. Default is (1024*ALIGN).
43  *
44  *	This algorithm is based on a best fit strategy with lists of
45  *	free elts maintained in a self-adjusting binary tree. Each list
46  *	contains all elts of the same size. The tree is ordered by size.
47  *	For results on self-adjusting trees, see the paper:
48  *		Self-Adjusting Binary Trees,
49  *		DD Sleator & RE Tarjan, JACM 1985.
50  *
51  *	The header of a block contains the size of the data part in bytes.
52  *	Since the size of a block is 0%4, the low two bits of the header
53  *	are free and used as follows:
54  *
55  *		BIT0:	1 for busy (block is in use), 0 for free.
56  *		BIT1:	if the block is busy, this bit is 1 if the
57  *			preceding block in contiguous memory is free.
58  *			Otherwise, it is always 0.
59  */
60 
61 #include <c_synonyms.h>
62 #include "mallint.h"
63 
64 static	mutex_t	__watch_malloc_lock = DEFAULTMUTEX;
65 
66 static	TREE	*Root;		/* root of the free tree */
67 static	TREE	*Bottom;	/* the last free chunk in the arena */
68 static	char	*Baddr;		/* current high address of the arena */
69 
70 static	void	t_delete(TREE *);
71 static	void	t_splay(TREE *);
72 static	void	realfree(void *);
73 static	void	*malloc_unlocked(size_t);
74 static	void	free_unlocked(void *);
75 static	TREE	*morecore(size_t);
76 
77 static	void	protect(TREE *);
78 static	void	unprotect(TREE *);
79 
80 #define	FREEPAT	0
81 #define	LIVEPAT	1
82 
83 /*
84  * Patterns to be copied into freed blocks and allocated blocks.
85  * 0xfeedbeef and 0xfeedface are invalid pointer values in all programs.
86  */
87 static	uint64_t	patterns[2] = {
88 	0xdeadbeefdeadbeefULL,	/* pattern in a freed block */
89 	0xbaddcafebaddcafeULL	/* pattern in an allocated block */
90 };
91 
92 static void
93 copy_pattern(int pat, TREE *tp)
94 {
95 	uint64_t pattern = patterns[pat];
96 	size_t sz = SIZE(tp) / sizeof (uint64_t);
97 	/* LINTED improper alignment */
98 	uint64_t *datap = (uint64_t *)DATA(tp);
99 
100 	while (sz--)
101 		*datap++ = pattern;
102 }
103 
104 /*
105  * Keep lists of small blocks, LIFO order.
106  */
107 static	TREE	*List[MINSIZE/WORDSIZE-1];
108 static	TREE	*Last[MINSIZE/WORDSIZE-1];
109 
110 /* number of blocks to get at one time */
111 #define	NPS	(WORDSIZE*8)
112 
113 static void *
114 smalloc(size_t size)
115 {
116 	TREE	*tp;
117 	size_t	i;
118 
119 	ASSERT(size % WORDSIZE == 0);
120 	/* want to return a unique pointer on malloc(0) */
121 	if (size == 0)
122 		size = WORDSIZE;
123 
124 	/* list to use */
125 	i = size / WORDSIZE - 1;
126 
127 	if (List[i] == NULL) {
128 		TREE	*np;
129 		int	n;
130 		ASSERT((size + WORDSIZE) * NPS >= MINSIZE);
131 
132 		/* get NPS of these block types */
133 		if ((np = malloc_unlocked((size + WORDSIZE)*NPS)) == NULL)
134 			return (NULL);
135 
136 		/* make them into a link list */
137 		for (n = 0, List[i] = np; n < NPS; ++n) {
138 			tp = np;
139 			SIZE(tp) = size;
140 			copy_pattern(FREEPAT, tp);
141 			if (n == NPS - 1) {
142 				Last[i] = tp;
143 				np = NULL;
144 			} else {
145 				/* LINTED improper alignment */
146 				np = NEXT(tp);
147 			}
148 			AFTER(tp) = np;
149 			protect(tp);
150 		}
151 	}
152 
153 	/* allocate from the head of the queue */
154 	tp = List[i];
155 	unprotect(tp);
156 	if ((List[i] = AFTER(tp)) == NULL)
157 		Last[i] = NULL;
158 	copy_pattern(LIVEPAT, tp);
159 	SETBIT0(SIZE(tp));
160 	protect(tp);
161 	return (DATA(tp));
162 }
163 
164 void *
165 malloc(size_t size)
166 {
167 	void	*ret;
168 	(void) mutex_lock(&__watch_malloc_lock);
169 	ret = malloc_unlocked(size);
170 	(void) mutex_unlock(&__watch_malloc_lock);
171 	return (ret);
172 }
173 
174 static void *
175 malloc_unlocked(size_t size)
176 {
177 	size_t	n;
178 	TREE	*tp, *sp, *tmp;
179 
180 	COUNT(nmalloc);
181 	ASSERT(WORDSIZE == ALIGN);
182 
183 	/* check for size that could overflow calculations */
184 	if (size > MAX_MALLOC) {
185 		errno = ENOMEM;
186 		return (NULL);
187 	}
188 	/* make sure that size is 0 mod ALIGN */
189 	ROUND(size);
190 
191 	/* small blocks */
192 	if (size < MINSIZE)
193 		return (smalloc(size));
194 
195 	/* search for an elt of the right size */
196 	sp = NULL;
197 	n = 0;
198 	if (Root) {
199 		tp = Root;
200 		for (;;) {
201 			unprotect(tp);
202 			if (SIZE(tp) >= size) {	/* branch left */
203 				if (n == 0 || n >= SIZE(tp)) {
204 					sp = tp;
205 					n = SIZE(tp);
206 				}
207 				if ((tmp = LEFT(tp)) != NULL) {
208 					protect(tp);
209 					tp = tmp;
210 				} else {
211 					protect(tp);
212 					break;
213 				}
214 			} else {		/* branch right */
215 				if ((tmp = RIGHT(tp)) != NULL) {
216 					protect(tp);
217 					tp = tmp;
218 				} else {
219 					protect(tp);
220 					break;
221 				}
222 			}
223 		}
224 
225 		if (sp) {
226 			unprotect(sp);
227 			t_delete(sp);
228 		} else if (tp != Root) {
229 			/* make the searched-to element the root */
230 			unprotect(tp);
231 			t_splay(tp);
232 			protect(tp);
233 			Root = tp;
234 		}
235 	}
236 
237 	/* if found none fitted in the tree */
238 	if (sp == NULL) {
239 		if (Bottom) {
240 			unprotect(Bottom);
241 			if (size <= SIZE(Bottom)) {
242 				sp = Bottom;
243 				CLRBITS01(SIZE(sp));
244 			} else {
245 				protect(Bottom);
246 				if ((sp = morecore(size)) == NULL)
247 					return (NULL);
248 			}
249 		} else {
250 			if ((sp = morecore(size)) == NULL)
251 				return (NULL);
252 		}
253 	}
254 
255 	/* tell the forward neighbor that we're busy */
256 	/* LINTED improper alignment */
257 	tmp = NEXT(sp);
258 	unprotect(tmp);
259 	CLRBIT1(SIZE(tmp));
260 	ASSERT(ISBIT0(SIZE(tmp)));
261 	protect(tmp);
262 
263 leftover:
264 	/* if the leftover is enough for a new free piece */
265 	if ((n = (SIZE(sp) - size)) >= MINSIZE + WORDSIZE) {
266 		n -= WORDSIZE;
267 		SIZE(sp) = size;
268 		/* LINTED improper alignment */
269 		tp = NEXT(sp);
270 		SIZE(tp) = n | BIT0;
271 		realfree(DATA(tp));
272 	} else if (BOTTOM(sp))
273 		Bottom = NULL;
274 
275 	/* return the allocated space */
276 	copy_pattern(LIVEPAT, sp);
277 	SIZE(sp) |= BIT0;
278 	protect(sp);
279 	return (DATA(sp));
280 }
281 
282 /*
283  *	realloc().
284  *	If the block size is increasing, we try forward merging first.
285  *	This is not best-fit but it avoids some data recopying.
286  */
287 void *
288 realloc(void *old, size_t size)
289 {
290 	TREE	*tp, *np;
291 	size_t	ts;
292 	char	*new;
293 
294 	COUNT(nrealloc);
295 
296 	/* check for size that could overflow calculations */
297 	if (size > MAX_MALLOC) {
298 		errno = ENOMEM;
299 		return (NULL);
300 	}
301 
302 	/* pointer to the block */
303 	(void) mutex_lock(&__watch_malloc_lock);
304 	if (old == NULL) {
305 		new = malloc_unlocked(size);
306 		(void) mutex_unlock(&__watch_malloc_lock);
307 		return (new);
308 	}
309 
310 	/* make sure that size is 0 mod ALIGN */
311 	ROUND(size);
312 
313 	/* LINTED improper alignment */
314 	tp = BLOCK(old);
315 	unprotect(tp);
316 	ts = SIZE(tp);
317 
318 	/* if the block was freed, data has been destroyed. */
319 	if (!ISBIT0(ts)) {
320 		/* XXX; complain here! */
321 		protect(tp);
322 		(void) mutex_unlock(&__watch_malloc_lock);
323 		errno = EINVAL;
324 		return (NULL);
325 	}
326 
327 	CLRBITS01(SIZE(tp));
328 	if (size == SIZE(tp)) {	/* nothing to do */
329 		SIZE(tp) = ts;
330 		protect(tp);
331 		(void) mutex_unlock(&__watch_malloc_lock);
332 		return (old);
333 	}
334 
335 	/* special cases involving small blocks */
336 	if (size < MINSIZE || SIZE(tp) < MINSIZE) {
337 		if (size == 0) {
338 			SETOLD01(SIZE(tp), ts);
339 			free_unlocked(old);
340 			(void) mutex_unlock(&__watch_malloc_lock);
341 			return (NULL);
342 		}
343 		goto call_malloc;
344 	}
345 
346 	/* block is increasing in size, try merging the next block */
347 	if (size > SIZE(tp)) {
348 		/* LINTED improper alignment */
349 		np = NEXT(tp);
350 		unprotect(np);
351 		if (ISBIT0(SIZE(np)))
352 			protect(np);
353 		else {
354 			TREE *tmp;
355 			ASSERT(SIZE(np) >= MINSIZE);
356 			ASSERT(!ISBIT1(SIZE(np)));
357 			SIZE(tp) += SIZE(np) + WORDSIZE;
358 			if (np != Bottom)
359 				t_delete(np);
360 			else
361 				Bottom = NULL;
362 			/* LINTED improper alignment */
363 			tmp = NEXT(np);
364 			unprotect(tmp);
365 			CLRBIT1(SIZE(tmp));
366 			protect(tmp);
367 		}
368 
369 		/* not enough & at TRUE end of memory, try extending core */
370 		if (size > SIZE(tp) && BOTTOM(tp) && GETCORE(0) == Baddr) {
371 			Bottom = tp;
372 			protect(Bottom);
373 			if ((tp = morecore(size)) == NULL) {
374 				tp = Bottom;
375 				Bottom = NULL;
376 				unprotect(tp);
377 			}
378 		}
379 	}
380 
381 	/* got enough space to use */
382 	if (size <= SIZE(tp)) {
383 		size_t n;
384 chop_big:
385 		if ((n = (SIZE(tp) - size)) >= MINSIZE + WORDSIZE) {
386 			n -= WORDSIZE;
387 			SIZE(tp) = size;
388 			/* LINTED improper alignment */
389 			np = NEXT(tp);
390 			SIZE(np) = n | BIT0;
391 			realfree(DATA(np));
392 		} else if (BOTTOM(tp))
393 			Bottom = NULL;
394 
395 		/* the previous block may be free */
396 		SETOLD01(SIZE(tp), ts);
397 		protect(tp);
398 		(void) mutex_unlock(&__watch_malloc_lock);
399 		return (old);
400 	}
401 
402 call_malloc:	/* call malloc to get a new block */
403 	SETOLD01(SIZE(tp), ts);
404 	if ((new = malloc_unlocked(size)) != NULL) {
405 		CLRBITS01(ts);
406 		if (ts > size)
407 			ts = size;
408 		(void) memcpy(new, old, ts);
409 		free_unlocked(old);
410 		(void) mutex_unlock(&__watch_malloc_lock);
411 		return (new);
412 	}
413 
414 	/*
415 	 * Attempt special case recovery allocations since malloc() failed:
416 	 *
417 	 * 1. size <= SIZE(tp) < MINSIZE
418 	 *	Simply return the existing block
419 	 * 2. SIZE(tp) < size < MINSIZE
420 	 *	malloc() may have failed to allocate the chunk of
421 	 *	small blocks. Try asking for MINSIZE bytes.
422 	 * 3. size < MINSIZE <= SIZE(tp)
423 	 *	malloc() may have failed as with 2.  Change to
424 	 *	MINSIZE allocation which is taken from the beginning
425 	 *	of the current block.
426 	 * 4. MINSIZE <= SIZE(tp) < size
427 	 *	If the previous block is free and the combination of
428 	 *	these two blocks has at least size bytes, then merge
429 	 *	the two blocks copying the existing contents backwards.
430 	 */
431 	CLRBITS01(SIZE(tp));
432 	if (SIZE(tp) < MINSIZE) {
433 		if (size < SIZE(tp))		/* case 1. */ {
434 			SETOLD01(SIZE(tp), ts);
435 			protect(tp);
436 			(void) mutex_unlock(&__watch_malloc_lock);
437 			return (old);
438 		} else if (size < MINSIZE)	/* case 2. */ {
439 			size = MINSIZE;
440 			goto call_malloc;
441 		}
442 	} else if (size < MINSIZE)		/* case 3. */ {
443 		size = MINSIZE;
444 		goto chop_big;
445 	} else if (ISBIT1(ts)) {
446 		np = LAST(tp);
447 		unprotect(np);
448 		if ((SIZE(np) + SIZE(tp) + WORDSIZE) >= size) {
449 			ASSERT(!ISBIT0(SIZE(np)));
450 			t_delete(np);
451 			SIZE(np) += SIZE(tp) + WORDSIZE;
452 			/*
453 			 * Since the copy may overlap, use memmove().
454 			 */
455 			(void) memmove(DATA(np), old, SIZE(tp));
456 			old = DATA(np);
457 			tp = np;
458 			CLRBIT1(ts);
459 			goto chop_big;
460 		}
461 		protect(np);
462 	}
463 	SETOLD01(SIZE(tp), ts);
464 	protect(tp);
465 	(void) mutex_unlock(&__watch_malloc_lock);
466 	/* malloc() sets errno */
467 	return (NULL);
468 }
469 
470 /*
471  *	realfree().
472  *	Coalescing of adjacent free blocks is done first.
473  *	Then, the new free block is leaf-inserted into the free tree
474  *	without splaying. This strategy does not guarantee the amortized
475  *	O(nlogn) behaviour for the insert/delete/find set of operations
476  *	on the tree. In practice, however, free is much more infrequent
477  *	than malloc/realloc and the tree searches performed by these
478  *	functions adequately keep the tree in balance.
479  */
480 static void
481 realfree(void *old)
482 {
483 	TREE	*tp, *sp, *np, *tmp;
484 	size_t	ts, size;
485 
486 	COUNT(nfree);
487 
488 	/* pointer to the block */
489 	/* LINTED improper alignment */
490 	tp = BLOCK(old);
491 	unprotect(tp);
492 	ts = SIZE(tp);
493 	if (!ISBIT0(ts)) {	/* block is not busy; previously freed? */
494 		protect(tp);	/* force a watchpoint trap */
495 		CLRBIT0(SIZE(tp));
496 		return;
497 	}
498 	CLRBITS01(SIZE(tp));
499 	copy_pattern(FREEPAT, tp);
500 
501 	/* small block, return it to the tail of its queue */
502 	if (SIZE(tp) < MINSIZE) {
503 		ASSERT(SIZE(tp) / WORDSIZE >= 1);
504 		ts = SIZE(tp) / WORDSIZE - 1;
505 		AFTER(tp) = NULL;
506 		protect(tp);
507 		if (List[ts] == NULL) {
508 			List[ts] = tp;
509 			Last[ts] = tp;
510 		} else {
511 			sp = Last[ts];
512 			unprotect(sp);
513 			AFTER(sp) = tp;
514 			protect(sp);
515 			Last[ts] = tp;
516 		}
517 		return;
518 	}
519 
520 	/* see if coalescing with next block is warranted */
521 	/* LINTED improper alignment */
522 	np = NEXT(tp);
523 	unprotect(np);
524 	if (ISBIT0(SIZE(np)))
525 		protect(np);
526 	else {
527 		if (np != Bottom)
528 			t_delete(np);
529 		SIZE(tp) += SIZE(np) + WORDSIZE;
530 	}
531 
532 	/* the same with the preceding block */
533 	if (ISBIT1(ts)) {
534 		np = LAST(tp);
535 		unprotect(np);
536 		ASSERT(!ISBIT0(SIZE(np)));
537 		ASSERT(np != Bottom);
538 		t_delete(np);
539 		SIZE(np) += SIZE(tp) + WORDSIZE;
540 		tp = np;
541 	}
542 
543 	/* initialize tree info */
544 	PARENT(tp) = LEFT(tp) = RIGHT(tp) = LINKFOR(tp) = NULL;
545 
546 	/* set bottom block, or insert in the free tree */
547 	if (BOTTOM(tp))
548 		Bottom = tp;
549 	else {
550 		/* search for the place to insert */
551 		if (Root) {
552 			size = SIZE(tp);
553 			np = Root;
554 			for (;;) {
555 				unprotect(np);
556 				if (SIZE(np) > size) {
557 					if ((tmp = LEFT(np)) != NULL) {
558 						protect(np);
559 						np = tmp;
560 					} else {
561 						LEFT(np) = tp;
562 						PARENT(tp) = np;
563 						protect(np);
564 						break;
565 					}
566 				} else if (SIZE(np) < size) {
567 					if ((tmp = RIGHT(np)) != NULL) {
568 						protect(np);
569 						np = tmp;
570 					} else {
571 						RIGHT(np) = tp;
572 						PARENT(tp) = np;
573 						protect(np);
574 						break;
575 					}
576 				} else {
577 					if ((sp = PARENT(np)) != NULL) {
578 						unprotect(sp);
579 						if (np == LEFT(sp))
580 							LEFT(sp) = tp;
581 						else
582 							RIGHT(sp) = tp;
583 						PARENT(tp) = sp;
584 						protect(sp);
585 					} else
586 						Root = tp;
587 
588 					/* insert to head of list */
589 					if ((sp = LEFT(np)) != NULL) {
590 						unprotect(sp);
591 						PARENT(sp) = tp;
592 						protect(sp);
593 					}
594 					LEFT(tp) = sp;
595 
596 					if ((sp = RIGHT(np)) != NULL) {
597 						unprotect(sp);
598 						PARENT(sp) = tp;
599 						protect(sp);
600 					}
601 					RIGHT(tp) = sp;
602 
603 					/* doubly link list */
604 					LINKFOR(tp) = np;
605 					LINKBAK(np) = tp;
606 					SETNOTREE(np);
607 					protect(np);
608 
609 					break;
610 				}
611 			}
612 		} else {
613 			Root = tp;
614 		}
615 	}
616 
617 	/*
618 	 * Tell next block that this one is free.
619 	 * The first WORD of the next block contains self's address.
620 	 */
621 	/* LINTED improper alignment */
622 	tmp = NEXT(tp);
623 	unprotect(tmp);
624 	/* LINTED improper alignment */
625 	*(SELFP(tp)) = tp;
626 	SETBIT1(SIZE(tmp));
627 	ASSERT(ISBIT0(SIZE(tmp)));
628 	protect(tmp);
629 	protect(tp);
630 }
631 
632 /*
633  * Get more core. Gaps in memory are noted as busy blocks.
634  */
635 static TREE *
636 morecore(size_t size)
637 {
638 	TREE	*tp;
639 	size_t	n, offset, requestsize;
640 	char	*addr;
641 
642 	/* compute new amount of memory to get */
643 	tp = Bottom;
644 	n = size + 2 * WORDSIZE;
645 	addr = GETCORE(0);
646 
647 	if (addr == ERRCORE)
648 		/* errno set by GETCORE sbrk */
649 		return (NULL);
650 
651 	/* need to pad size out so that addr is aligned */
652 	if ((((size_t)addr) % ALIGN) != 0)
653 		offset = ALIGN - (size_t)addr % ALIGN;
654 	else
655 		offset = 0;
656 
657 	if (tp)
658 		unprotect(tp);
659 
660 	/* if not segmented memory, what we need may be smaller */
661 	if (addr == Baddr) {
662 		n -= WORDSIZE;
663 		if (tp != NULL)
664 			n -= SIZE(tp);
665 	}
666 
667 	/* get a multiple of CORESIZE */
668 	n = ((n - 1) / CORESIZE + 1) * CORESIZE;
669 	requestsize = n + offset;
670 
671 	/* check if nsize request could overflow in GETCORE */
672 	if (requestsize > MAX_MALLOC - (size_t)addr) {
673 		if (tp)
674 			protect(tp);
675 		errno = ENOMEM;
676 		return (NULL);
677 	}
678 
679 	if (requestsize > MAX_GETCORE) {
680 		intptr_t	delta;
681 		/*
682 		 * the value required is too big for GETCORE() to deal with
683 		 * in one go, so use GETCORE() at most 2 times instead.
684 		 * Argument to GETCORE() must be multiple of ALIGN.
685 		 * If not, GETCORE(-MAX_GETCORE) will not return brk point
686 		 * to previous value, but will be ALIGN more.
687 		 * This would leave a small hole.
688 		 */
689 		delta = MAX_GETCORE;
690 		while (delta > 0) {
691 			if (GETCORE(delta) == ERRCORE) {
692 				if (tp)
693 					protect(tp);
694 				if (addr != GETCORE(0))
695 					(void) GETCORE(-MAX_GETCORE);
696 				return (NULL);
697 			}
698 			requestsize -= MAX_GETCORE;
699 			delta = requestsize;
700 		}
701 	} else if (GETCORE(requestsize) == ERRCORE) {
702 		if (tp)
703 			protect(tp);
704 		return (NULL);
705 	}
706 
707 	/* contiguous memory */
708 	if (addr == Baddr) {
709 		ASSERT(offset == 0);
710 		if (tp) {
711 			addr = ((char *)tp);
712 			n += SIZE(tp) + 2 * WORDSIZE;
713 		} else {
714 			addr = Baddr - WORDSIZE;
715 			n += WORDSIZE;
716 		}
717 	} else {
718 		addr += offset;
719 	}
720 
721 	/* new bottom address */
722 	Baddr = addr + n;
723 
724 	/* new bottom block */
725 	/* LINTED improper alignment */
726 	tp = ((TREE *)addr);
727 	SIZE(tp) = n - 2 * WORDSIZE;
728 	ASSERT((SIZE(tp) % ALIGN) == 0);
729 
730 	/* reserved the last word to head any noncontiguous memory */
731 	/* LINTED improper alignment */
732 	SETBIT0(SIZE(NEXT(tp)));
733 
734 	/* non-contiguous memory, free old bottom block */
735 	if (Bottom && Bottom != tp) {
736 		SETBIT0(SIZE(Bottom));
737 		realfree(DATA(Bottom));
738 	}
739 
740 	return (tp);
741 }
742 
743 /*
744  * Utility function to avoid protecting a tree node twice.
745  * Return true if tp is in the NULL-terminated array of tree nodes.
746  */
747 static int
748 in_list(TREE *tp, TREE **npp)
749 {
750 	TREE *sp;
751 
752 	while ((sp = *npp++) != NULL)
753 		if (tp == sp)
754 			return (1);
755 	return (0);
756 }
757 
758 /*
759  * Tree rotation functions (BU: bottom-up, TD: top-down).
760  * All functions are entered with the arguments unprotected.
761  * They must return in the same condition, with all other elements
762  * that have been unprotected during the operation re-protected.
763  */
764 static void
765 LEFT1(TREE *x, TREE *y)
766 {
767 	TREE *node[3];
768 	TREE **npp = node;
769 	TREE *tp;
770 
771 	if ((RIGHT(x) = LEFT(y)) != NULL) {
772 		unprotect(*npp++ = RIGHT(x));
773 		PARENT(RIGHT(x)) = x;
774 	}
775 	if ((PARENT(y) = PARENT(x)) != NULL) {
776 		unprotect(*npp++ = PARENT(x));
777 		if (LEFT(PARENT(x)) == x)
778 			LEFT(PARENT(y)) = y;
779 		else
780 			RIGHT(PARENT(y)) = y;
781 	}
782 	LEFT(y) = x;
783 	PARENT(x) = y;
784 
785 	*npp = NULL;
786 	npp = node;
787 	while ((tp = *npp++) != NULL)
788 		if (tp != x && tp != y && !in_list(tp, npp))
789 			protect(tp);
790 }
791 
792 static void
793 RIGHT1(TREE *x, TREE *y)
794 {
795 	TREE *node[3];
796 	TREE **npp = node;
797 	TREE *tp;
798 
799 	if ((LEFT(x) = RIGHT(y)) != NULL) {
800 		unprotect(*npp++ = LEFT(x));
801 		PARENT(LEFT(x)) = x;
802 	}
803 	if ((PARENT(y) = PARENT(x)) != NULL) {
804 		unprotect(*npp++ = PARENT(x));
805 		if (LEFT(PARENT(x)) == x)
806 			LEFT(PARENT(y)) = y;
807 		else
808 			RIGHT(PARENT(y)) = y;
809 	}
810 	RIGHT(y) = x;
811 	PARENT(x) = y;
812 
813 	*npp = NULL;
814 	npp = node;
815 	while ((tp = *npp++) != NULL)
816 		if (tp != x && tp != y && !in_list(tp, npp))
817 			protect(tp);
818 }
819 
820 static void
821 BULEFT2(TREE *x, TREE *y, TREE *z)
822 {
823 	TREE *node[4];
824 	TREE **npp = node;
825 	TREE *tp;
826 
827 	if ((RIGHT(x) = LEFT(y)) != NULL) {
828 		unprotect(*npp++ = RIGHT(x));
829 		PARENT(RIGHT(x)) = x;
830 	}
831 	if ((RIGHT(y) = LEFT(z)) != NULL) {
832 		unprotect(*npp++ = RIGHT(y));
833 		PARENT(RIGHT(y)) = y;
834 	}
835 	if ((PARENT(z) = PARENT(x)) != NULL) {
836 		unprotect(*npp++ = PARENT(x));
837 		if (LEFT(PARENT(x)) == x)
838 			LEFT(PARENT(z)) = z;
839 		else
840 			RIGHT(PARENT(z)) = z;
841 	}
842 	LEFT(z) = y;
843 	PARENT(y) = z;
844 	LEFT(y) = x;
845 	PARENT(x) = y;
846 
847 	*npp = NULL;
848 	npp = node;
849 	while ((tp = *npp++) != NULL)
850 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
851 			protect(tp);
852 }
853 
854 static void
855 BURIGHT2(TREE *x, TREE *y, TREE *z)
856 {
857 	TREE *node[4];
858 	TREE **npp = node;
859 	TREE *tp;
860 
861 	if ((LEFT(x) = RIGHT(y)) != NULL) {
862 		unprotect(*npp++ = LEFT(x));
863 		PARENT(LEFT(x)) = x;
864 	}
865 	if ((LEFT(y) = RIGHT(z)) != NULL) {
866 		unprotect(*npp++ = LEFT(y));
867 		PARENT(LEFT(y)) = y;
868 	}
869 	if ((PARENT(z) = PARENT(x)) != NULL) {
870 		unprotect(*npp++ = PARENT(x));
871 		if (LEFT(PARENT(x)) == x)
872 			LEFT(PARENT(z)) = z;
873 		else
874 			RIGHT(PARENT(z)) = z;
875 	}
876 	RIGHT(z) = y;
877 	PARENT(y) = z;
878 	RIGHT(y) = x;
879 	PARENT(x) = y;
880 
881 	*npp = NULL;
882 	npp = node;
883 	while ((tp = *npp++) != NULL)
884 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
885 			protect(tp);
886 }
887 
888 static void
889 TDLEFT2(TREE *x, TREE *y, TREE *z)
890 {
891 	TREE *node[3];
892 	TREE **npp = node;
893 	TREE *tp;
894 
895 	if ((RIGHT(y) = LEFT(z)) != NULL) {
896 		unprotect(*npp++ = RIGHT(y));
897 		PARENT(RIGHT(y)) = y;
898 	}
899 	if ((PARENT(z) = PARENT(x)) != NULL) {
900 		unprotect(*npp++ = PARENT(x));
901 		if (LEFT(PARENT(x)) == x)
902 			LEFT(PARENT(z)) = z;
903 		else
904 			RIGHT(PARENT(z)) = z;
905 	}
906 	PARENT(x) = z;
907 	LEFT(z) = x;
908 
909 	*npp = NULL;
910 	npp = node;
911 	while ((tp = *npp++) != NULL)
912 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
913 			protect(tp);
914 }
915 
916 #if 0	/* Not used, for now */
917 static void
918 TDRIGHT2(TREE *x, TREE *y, TREE *z)
919 {
920 	TREE *node[3];
921 	TREE **npp = node;
922 	TREE *tp;
923 
924 	if ((LEFT(y) = RIGHT(z)) != NULL) {
925 		unprotect(*npp++ = LEFT(y));
926 		PARENT(LEFT(y)) = y;
927 	}
928 	if ((PARENT(z) = PARENT(x)) != NULL) {
929 		unprotect(*npp++ = PARENT(x));
930 		if (LEFT(PARENT(x)) == x)
931 			LEFT(PARENT(z)) = z;
932 		else
933 			RIGHT(PARENT(z)) = z;
934 	}
935 	PARENT(x) = z;
936 	RIGHT(z) = x;
937 
938 	*npp = NULL;
939 	npp = node;
940 	while ((tp = *npp++) != NULL)
941 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
942 			protect(tp);
943 }
944 #endif
945 
946 /*
947  *	Delete a tree element
948  */
949 static void
950 t_delete(TREE *op)
951 {
952 	TREE *tp, *sp, *gp;
953 
954 	/* if this is a non-tree node */
955 	if (ISNOTREE(op)) {
956 		tp = LINKBAK(op);
957 		unprotect(tp);
958 		if ((sp = LINKFOR(op)) != NULL) {
959 			unprotect(sp);
960 			LINKBAK(sp) = tp;
961 			protect(sp);
962 		}
963 		LINKFOR(tp) = sp;
964 		protect(tp);
965 		return;
966 	}
967 
968 	/* make op the root of the tree */
969 	if (PARENT(op))
970 		t_splay(op);
971 
972 	/* if this is the start of a list */
973 	if ((tp = LINKFOR(op)) != NULL) {
974 		unprotect(tp);
975 		PARENT(tp) = NULL;
976 		if ((sp = LEFT(op)) != NULL) {
977 			unprotect(sp);
978 			PARENT(sp) = tp;
979 			protect(sp);
980 		}
981 		LEFT(tp) = sp;
982 
983 		if ((sp = RIGHT(op)) != NULL) {
984 			unprotect(sp);
985 			PARENT(sp) = tp;
986 			protect(sp);
987 		}
988 		RIGHT(tp) = sp;
989 
990 		Root = tp;
991 		protect(tp);
992 		return;
993 	}
994 
995 	/* if op has a non-null left subtree */
996 	if ((tp = LEFT(op)) != NULL) {
997 		unprotect(tp);
998 		PARENT(tp) = NULL;
999 		if (RIGHT(op)) {
1000 			/* make the right-end of the left subtree its root */
1001 			while ((sp = RIGHT(tp)) != NULL) {
1002 				unprotect(sp);
1003 				if ((gp = RIGHT(sp)) != NULL) {
1004 					unprotect(gp);
1005 					TDLEFT2(tp, sp, gp);
1006 					protect(sp);
1007 					protect(tp);
1008 					tp = gp;
1009 				} else {
1010 					LEFT1(tp, sp);
1011 					protect(tp);
1012 					tp = sp;
1013 				}
1014 			}
1015 
1016 			/* hook the right subtree of op to the above elt */
1017 			RIGHT(tp) = sp = RIGHT(op);
1018 			unprotect(sp);
1019 			PARENT(sp) = tp;
1020 			protect(sp);
1021 		}
1022 		protect(tp);
1023 	} else if ((tp = RIGHT(op)) != NULL) {	/* no left subtree */
1024 		unprotect(tp);
1025 		PARENT(tp) = NULL;
1026 		protect(tp);
1027 	}
1028 
1029 	Root = tp;
1030 }
1031 
1032 /*
1033  *	Bottom up splaying (simple version).
1034  *	The basic idea is to roughly cut in half the
1035  *	path from Root to tp and make tp the new root.
1036  */
1037 static void
1038 t_splay(TREE *tp)
1039 {
1040 	TREE *pp, *gp;
1041 
1042 	/* iterate until tp is the root */
1043 	while ((pp = PARENT(tp)) != NULL) {
1044 		unprotect(pp);
1045 		/* grandparent of tp */
1046 		gp = PARENT(pp);
1047 		if (gp)
1048 			unprotect(gp);
1049 
1050 		/* x is a left child */
1051 		if (LEFT(pp) == tp) {
1052 			if (gp && LEFT(gp) == pp) {
1053 				BURIGHT2(gp, pp, tp);
1054 				protect(gp);
1055 			} else {
1056 				if (gp)
1057 					protect(gp);
1058 				RIGHT1(pp, tp);
1059 			}
1060 		} else {
1061 			ASSERT(RIGHT(pp) == tp);
1062 			if (gp && RIGHT(gp) == pp) {
1063 				BULEFT2(gp, pp, tp);
1064 				protect(gp);
1065 			} else {
1066 				if (gp)
1067 					protect(gp);
1068 				LEFT1(pp, tp);
1069 			}
1070 		}
1071 		protect(pp);
1072 		unprotect(tp);	/* just in case */
1073 	}
1074 }
1075 
1076 void
1077 free(void *old)
1078 {
1079 	(void) mutex_lock(&__watch_malloc_lock);
1080 	free_unlocked(old);
1081 	(void) mutex_unlock(&__watch_malloc_lock);
1082 }
1083 
1084 
1085 static void
1086 free_unlocked(void *old)
1087 {
1088 	if (old != NULL)
1089 		realfree(old);
1090 }
1091 
1092 
1093 /*
1094  * memalign(align,nbytes)
1095  *
1096  * Description:
1097  *	Returns a block of specified size on a specified alignment boundary.
1098  *
1099  * Algorithm:
1100  *	Malloc enough to ensure that a block can be aligned correctly.
1101  *	Find the alignment point and return the fragments
1102  *	before and after the block.
1103  *
1104  * Errors:
1105  *	Returns NULL and sets errno as follows:
1106  *	[EINVAL]
1107  *		if nbytes = 0,
1108  *		or if alignment is misaligned,
1109  *		or if the heap has been detectably corrupted.
1110  *	[ENOMEM]
1111  *		if the requested memory could not be allocated.
1112  */
1113 
1114 #define	misaligned(p)		((unsigned)(p) & 3)
1115 		/* 4-byte "word" alignment is considered ok in LP64 */
1116 #define	nextblk(p, size)	((TREE *)((char *)(p) + (size)))
1117 
1118 void *
1119 memalign(size_t align, size_t nbytes)
1120 {
1121 	size_t	reqsize;	/* Num of bytes to get from malloc() */
1122 	TREE	*p;		/* Ptr returned from malloc() */
1123 	TREE	*blk;		/* For addressing fragment blocks */
1124 	size_t	blksize;	/* Current (shrinking) block size */
1125 	TREE	*alignedp;	/* Ptr to properly aligned boundary */
1126 	TREE	*aligned_blk;	/* The block to be returned */
1127 	size_t	frag_size;	/* size of fragments fore and aft */
1128 	size_t	x;
1129 
1130 	/*
1131 	 * check for valid size and alignment parameters
1132 	 * MAX_ALIGN check prevents overflow in later calculation.
1133 	 */
1134 	if (nbytes == 0 || misaligned(align) || align == 0 ||
1135 	    align > MAX_ALIGN) {
1136 		errno = EINVAL;
1137 		return (NULL);
1138 	}
1139 
1140 	/*
1141 	 * Malloc enough memory to guarantee that the result can be
1142 	 * aligned correctly. The worst case is when malloc returns
1143 	 * a block so close to the next alignment boundary that a
1144 	 * fragment of minimum size cannot be created.  In order to
1145 	 * make sure we can handle this, we need to force the
1146 	 * alignment to be at least as large as the minimum frag size
1147 	 * (MINSIZE + WORDSIZE).
1148 	 */
1149 
1150 	/* check for size that could overflow ROUND calculation */
1151 	if (nbytes > MAX_MALLOC) {
1152 		errno = ENOMEM;
1153 		return (NULL);
1154 	}
1155 	ROUND(nbytes);
1156 	if (nbytes < MINSIZE)
1157 		nbytes = MINSIZE;
1158 	ROUND(align);
1159 	while (align < MINSIZE + WORDSIZE)
1160 		align <<= 1;
1161 	reqsize = nbytes + align + (MINSIZE + WORDSIZE);
1162 	/* check for overflow */
1163 	if (reqsize < nbytes) {
1164 		errno = ENOMEM;
1165 		return (NULL);
1166 	}
1167 	p = (TREE *) malloc(reqsize);
1168 	if (p == (TREE *) NULL) {
1169 		/* malloc sets errno */
1170 		return (NULL);
1171 	}
1172 	(void) mutex_lock(&__watch_malloc_lock);
1173 
1174 	/*
1175 	 * get size of the entire block (overhead and all)
1176 	 */
1177 	/* LINTED improper alignment */
1178 	blk = BLOCK(p);			/* back up to get length word */
1179 	unprotect(blk);
1180 	blksize = SIZE(blk);
1181 	CLRBITS01(blksize);
1182 
1183 	/*
1184 	 * locate the proper alignment boundary within the block.
1185 	 */
1186 	x = (size_t)p;
1187 	if (x % align != 0)
1188 		x += align - (x % align);
1189 	alignedp = (TREE *)x;
1190 	/* LINTED improper alignment */
1191 	aligned_blk = BLOCK(alignedp);
1192 
1193 	/*
1194 	 * Check out the space to the left of the alignment
1195 	 * boundary, and split off a fragment if necessary.
1196 	 */
1197 	frag_size = (size_t)aligned_blk - (size_t)blk;
1198 	if (frag_size != 0) {
1199 		/*
1200 		 * Create a fragment to the left of the aligned block.
1201 		 */
1202 		if (frag_size < MINSIZE + WORDSIZE) {
1203 			/*
1204 			 * Not enough space. So make the split
1205 			 * at the other end of the alignment unit.
1206 			 * We know this yields enough space, because
1207 			 * we forced align >= MINSIZE + WORDSIZE above.
1208 			 */
1209 			frag_size += align;
1210 			/* LINTED improper alignment */
1211 			aligned_blk = nextblk(aligned_blk, align);
1212 		}
1213 		blksize -= frag_size;
1214 		SIZE(aligned_blk) = blksize | BIT0;
1215 		frag_size -= WORDSIZE;
1216 		SIZE(blk) = frag_size | BIT0 | ISBIT1(SIZE(blk));
1217 		free_unlocked(DATA(blk));
1218 		/*
1219 		 * free_unlocked(DATA(blk)) has the side-effect of calling
1220 		 * protect() on the block following blk, that is, aligned_blk.
1221 		 * We recover from this by unprotect()ing it here.
1222 		 */
1223 		unprotect(aligned_blk);
1224 	}
1225 
1226 	/*
1227 	 * Is there a (sufficiently large) fragment to the
1228 	 * right of the aligned block?
1229 	 */
1230 	frag_size = blksize - nbytes;
1231 	if (frag_size >= MINSIZE + WORDSIZE) {
1232 		/*
1233 		 * split and free a fragment on the right
1234 		 */
1235 		blksize = SIZE(aligned_blk);
1236 		SIZE(aligned_blk) = nbytes;
1237 		/* LINTED improper alignment */
1238 		blk = NEXT(aligned_blk);
1239 		SETOLD01(SIZE(aligned_blk), blksize);
1240 		frag_size -= WORDSIZE;
1241 		SIZE(blk) = frag_size | BIT0;
1242 		free_unlocked(DATA(blk));
1243 	}
1244 	copy_pattern(LIVEPAT, aligned_blk);
1245 	protect(aligned_blk);
1246 	(void) mutex_unlock(&__watch_malloc_lock);
1247 	return (DATA(aligned_blk));
1248 }
1249 
1250 void *
1251 valloc(size_t size)
1252 {
1253 	static unsigned pagesize;
1254 	if (!pagesize)
1255 		pagesize = _sysconf(_SC_PAGESIZE);
1256 	return (memalign(pagesize, size));
1257 }
1258 
1259 void *
1260 calloc(size_t num, size_t size)
1261 {
1262 	void *mp;
1263 	size_t total;
1264 
1265 	total = num * size;
1266 
1267 	/* check for overflow */
1268 	if (num != 0 && total / num != size) {
1269 		errno = ENOMEM;
1270 		return (NULL);
1271 	}
1272 	if ((mp = malloc(total)) != NULL)
1273 		(void) memset(mp, 0, total);
1274 	return (mp);
1275 }
1276 
1277 /* ARGSUSED1 */
1278 void
1279 cfree(void *p, size_t num, size_t size)
1280 {
1281 	free(p);
1282 }
1283 
1284 typedef struct {
1285 	long cmd;
1286 	prwatch_t prwatch;
1287 } ctl_t;
1288 
1289 static pid_t my_pid = 0;	/* to check for whether we fork()d */
1290 static int dont_watch = 0;
1291 static int do_stop = 0;
1292 static int ctlfd = -1;
1293 struct stat ctlstatb;
1294 static int wflags = WA_WRITE;
1295 
1296 static void
1297 init_watch()
1298 {
1299 	char str[80];
1300 	char *s;
1301 
1302 	my_pid = getpid();
1303 
1304 	dont_watch = 1;
1305 
1306 	if ((s = getenv("MALLOC_DEBUG")) == NULL)
1307 		return;
1308 
1309 	s = strncpy(str, s, sizeof (str));
1310 	while (s != NULL) {
1311 		char *e = strchr(s, ',');
1312 		if (e)
1313 			*e++ = '\0';
1314 		if (strcmp(s, "STOP") == 0)
1315 			do_stop = 1;
1316 		else if (strcmp(s, "WATCH") == 0)
1317 			dont_watch = 0;
1318 		else if (strcmp(s, "RW") == 0) {
1319 			dont_watch = 0;
1320 			wflags = WA_READ|WA_WRITE;
1321 		}
1322 		s = e;
1323 	}
1324 
1325 	if (dont_watch)
1326 		return;
1327 
1328 	if ((ctlfd = open("/proc/self/ctl", O_WRONLY)) < 0 ||
1329 	    fstat(ctlfd, &ctlstatb) != 0) {
1330 		if (ctlfd >= 0)
1331 			(void) close(ctlfd);
1332 		ctlfd = -1;
1333 		dont_watch = 1;
1334 		return;
1335 	}
1336 	/* close-on-exec */
1337 	(void) fcntl(ctlfd, F_SETFD, 1);
1338 
1339 	if (do_stop) {
1340 		int pfd;
1341 		pstatus_t pstatus;
1342 		struct {
1343 			long cmd;
1344 			fltset_t fltset;
1345 		} ctl;
1346 
1347 		/*
1348 		 * Play together with some /proc controller
1349 		 * that has set other stop-on-fault flags.
1350 		 */
1351 		premptyset(&ctl.fltset);
1352 		if ((pfd = open("/proc/self/status", O_RDONLY)) >= 0) {
1353 			if (read(pfd, &pstatus, sizeof (pstatus))
1354 			    == sizeof (pstatus))
1355 				ctl.fltset = pstatus.pr_flttrace;
1356 			(void) close(pfd);
1357 		}
1358 		praddset(&ctl.fltset, FLTWATCH);
1359 		ctl.cmd = PCSFAULT;
1360 		(void) write(ctlfd, &ctl, sizeof (ctl));
1361 	}
1362 }
1363 
1364 static int
1365 nowatch()
1366 {
1367 	struct stat statb;
1368 
1369 	if (dont_watch)
1370 		return (1);
1371 	if (ctlfd < 0)	/* first time */
1372 		init_watch();
1373 	else if (fstat(ctlfd, &statb) != 0 ||
1374 	    statb.st_dev != ctlstatb.st_dev ||
1375 	    statb.st_ino != ctlstatb.st_ino) {
1376 		/*
1377 		 * Someone closed our file descriptor.
1378 		 * Just open another one.
1379 		 */
1380 		if ((ctlfd = open("/proc/self/ctl", O_WRONLY)) < 0 ||
1381 		    fstat(ctlfd, &ctlstatb) != 0) {
1382 			if (ctlfd >= 0)
1383 				(void) close(ctlfd);
1384 			ctlfd = -1;
1385 			dont_watch = 1;
1386 			return (1);
1387 		}
1388 		/* close-on-exec */
1389 		(void) fcntl(ctlfd, F_SETFD, 1);
1390 	}
1391 	if (my_pid != getpid()) {
1392 		/*
1393 		 * We fork()d since the last call to the allocator.
1394 		 * watchpoints are not inherited across fork().
1395 		 * XXX: how to recover from this ???
1396 		 */
1397 		dont_watch = 1;
1398 		(void) close(ctlfd);
1399 		ctlfd = -1;
1400 	}
1401 	return (dont_watch);
1402 }
1403 
1404 static void
1405 protect(TREE *tp)
1406 {
1407 	ctl_t ctl;
1408 	size_t size, sz;
1409 
1410 	if (nowatch())
1411 		return;
1412 	if (tp == NULL || DATA(tp) == Baddr)
1413 		return;
1414 
1415 	sz = size = SIZE(tp);
1416 	CLRBITS01(size);
1417 	if (size == 0)
1418 		return;
1419 	if (ISBIT0(sz))		/* block is busy, protect only the head */
1420 		size = 0;
1421 	ctl.cmd = PCWATCH;
1422 	ctl.prwatch.pr_vaddr = (uintptr_t)tp;
1423 	ctl.prwatch.pr_size = size + WORDSIZE;
1424 	ctl.prwatch.pr_wflags = wflags;
1425 	(void) write(ctlfd, &ctl, sizeof (ctl));
1426 }
1427 
1428 static void
1429 unprotect(TREE *tp)
1430 {
1431 	ctl_t ctl;
1432 
1433 	if (nowatch())
1434 		return;
1435 	if (tp == NULL || DATA(tp) == Baddr)
1436 		return;
1437 
1438 	ctl.cmd = PCWATCH;
1439 	ctl.prwatch.pr_vaddr = (uintptr_t)tp;
1440 	ctl.prwatch.pr_size = WORDSIZE;		/* size is arbitrary */
1441 	ctl.prwatch.pr_wflags = 0;		/* clear the watched area */
1442 	(void) write(ctlfd, &ctl, sizeof (ctl));
1443 }
1444 
1445 static void
1446 malloc_prepare()
1447 {
1448 	(void) mutex_lock(&__watch_malloc_lock);
1449 }
1450 
1451 static void
1452 malloc_release()
1453 {
1454 	(void) mutex_unlock(&__watch_malloc_lock);
1455 }
1456 
1457 #pragma init(malloc_init)
1458 static void
1459 malloc_init(void)
1460 {
1461 	(void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
1462 }
1463