xref: /titanic_41/usr/src/lib/watchmalloc/common/malloc.c (revision b72d5b75fd6f5bb08d29f65652d60058fc3a2608)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  *	Memory management: malloc(), realloc(), free(), memalign().
34  *
35  *	The following #-parameters may be redefined:
36  *	GETCORE: a function to get more core memory.
37  *		GETCORE(0) is assumed to return the next available
38  *		address. Default is 'sbrk'.
39  *	ERRCORE: the error code as returned by GETCORE.
40  *		Default is ((char *)(-1)).
41  *	CORESIZE: a desired unit (measured in bytes) to be used
42  *		with GETCORE. Default is (1024*ALIGN).
43  *
44  *	This algorithm is based on a best fit strategy with lists of
45  *	free elts maintained in a self-adjusting binary tree. Each list
46  *	contains all elts of the same size. The tree is ordered by size.
47  *	For results on self-adjusting trees, see the paper:
48  *		Self-Adjusting Binary Trees,
49  *		DD Sleator & RE Tarjan, JACM 1985.
50  *
51  *	The header of a block contains the size of the data part in bytes.
52  *	Since the size of a block is 0%4, the low two bits of the header
53  *	are free and used as follows:
54  *
55  *		BIT0:	1 for busy (block is in use), 0 for free.
56  *		BIT1:	if the block is busy, this bit is 1 if the
57  *			preceding block in contiguous memory is free.
58  *			Otherwise, it is always 0.
59  */
60 
61 #include "mallint.h"
62 
63 static	mutex_t	__watch_malloc_lock = DEFAULTMUTEX;
64 
65 static	TREE	*Root;		/* root of the free tree */
66 static	TREE	*Bottom;	/* the last free chunk in the arena */
67 static	char	*Baddr;		/* current high address of the arena */
68 
69 static	void	t_delete(TREE *);
70 static	void	t_splay(TREE *);
71 static	void	realfree(void *);
72 static	void	*malloc_unlocked(size_t);
73 static	void	free_unlocked(void *);
74 static	TREE	*morecore(size_t);
75 
76 static	void	protect(TREE *);
77 static	void	unprotect(TREE *);
78 
79 #define	FREEPAT	0
80 #define	LIVEPAT	1
81 
82 /*
83  * Patterns to be copied into freed blocks and allocated blocks.
84  * 0xfeedbeef and 0xfeedface are invalid pointer values in all programs.
85  */
86 static	uint64_t	patterns[2] = {
87 	0xdeadbeefdeadbeefULL,	/* pattern in a freed block */
88 	0xbaddcafebaddcafeULL	/* pattern in an allocated block */
89 };
90 
91 static void
92 copy_pattern(int pat, TREE *tp)
93 {
94 	uint64_t pattern = patterns[pat];
95 	size_t sz = SIZE(tp) / sizeof (uint64_t);
96 	/* LINTED improper alignment */
97 	uint64_t *datap = (uint64_t *)DATA(tp);
98 
99 	while (sz--)
100 		*datap++ = pattern;
101 }
102 
103 /*
104  * Keep lists of small blocks, LIFO order.
105  */
106 static	TREE	*List[MINSIZE/WORDSIZE-1];
107 static	TREE	*Last[MINSIZE/WORDSIZE-1];
108 
109 /* number of blocks to get at one time */
110 #define	NPS	(WORDSIZE*8)
111 
112 static void *
113 smalloc(size_t size)
114 {
115 	TREE	*tp;
116 	size_t	i;
117 
118 	ASSERT(size % WORDSIZE == 0);
119 	/* want to return a unique pointer on malloc(0) */
120 	if (size == 0)
121 		size = WORDSIZE;
122 
123 	/* list to use */
124 	i = size / WORDSIZE - 1;
125 
126 	if (List[i] == NULL) {
127 		TREE	*np;
128 		int	n;
129 		ASSERT((size + WORDSIZE) * NPS >= MINSIZE);
130 
131 		/* get NPS of these block types */
132 		if ((np = malloc_unlocked((size + WORDSIZE)*NPS)) == NULL)
133 			return (NULL);
134 
135 		/* make them into a link list */
136 		for (n = 0, List[i] = np; n < NPS; ++n) {
137 			tp = np;
138 			SIZE(tp) = size;
139 			copy_pattern(FREEPAT, tp);
140 			if (n == NPS - 1) {
141 				Last[i] = tp;
142 				np = NULL;
143 			} else {
144 				/* LINTED improper alignment */
145 				np = NEXT(tp);
146 			}
147 			AFTER(tp) = np;
148 			protect(tp);
149 		}
150 	}
151 
152 	/* allocate from the head of the queue */
153 	tp = List[i];
154 	unprotect(tp);
155 	if ((List[i] = AFTER(tp)) == NULL)
156 		Last[i] = NULL;
157 	copy_pattern(LIVEPAT, tp);
158 	SETBIT0(SIZE(tp));
159 	protect(tp);
160 	return (DATA(tp));
161 }
162 
163 void *
164 malloc(size_t size)
165 {
166 	void	*ret;
167 	(void) mutex_lock(&__watch_malloc_lock);
168 	ret = malloc_unlocked(size);
169 	(void) mutex_unlock(&__watch_malloc_lock);
170 	return (ret);
171 }
172 
173 static void *
174 malloc_unlocked(size_t size)
175 {
176 	size_t	n;
177 	TREE	*tp, *sp, *tmp;
178 
179 	COUNT(nmalloc);
180 	ASSERT(WORDSIZE == ALIGN);
181 
182 	/* check for size that could overflow calculations */
183 	if (size > MAX_MALLOC) {
184 		errno = ENOMEM;
185 		return (NULL);
186 	}
187 	/* make sure that size is 0 mod ALIGN */
188 	ROUND(size);
189 
190 	/* small blocks */
191 	if (size < MINSIZE)
192 		return (smalloc(size));
193 
194 	/* search for an elt of the right size */
195 	sp = NULL;
196 	n = 0;
197 	if (Root) {
198 		tp = Root;
199 		for (;;) {
200 			unprotect(tp);
201 			if (SIZE(tp) >= size) {	/* branch left */
202 				if (n == 0 || n >= SIZE(tp)) {
203 					sp = tp;
204 					n = SIZE(tp);
205 				}
206 				if ((tmp = LEFT(tp)) != NULL) {
207 					protect(tp);
208 					tp = tmp;
209 				} else {
210 					protect(tp);
211 					break;
212 				}
213 			} else {		/* branch right */
214 				if ((tmp = RIGHT(tp)) != NULL) {
215 					protect(tp);
216 					tp = tmp;
217 				} else {
218 					protect(tp);
219 					break;
220 				}
221 			}
222 		}
223 
224 		if (sp) {
225 			unprotect(sp);
226 			t_delete(sp);
227 		} else if (tp != Root) {
228 			/* make the searched-to element the root */
229 			unprotect(tp);
230 			t_splay(tp);
231 			protect(tp);
232 			Root = tp;
233 		}
234 	}
235 
236 	/* if found none fitted in the tree */
237 	if (sp == NULL) {
238 		if (Bottom) {
239 			unprotect(Bottom);
240 			if (size <= SIZE(Bottom)) {
241 				sp = Bottom;
242 				CLRBITS01(SIZE(sp));
243 			} else {
244 				protect(Bottom);
245 				if ((sp = morecore(size)) == NULL)
246 					return (NULL);
247 			}
248 		} else {
249 			if ((sp = morecore(size)) == NULL)
250 				return (NULL);
251 		}
252 	}
253 
254 	/* tell the forward neighbor that we're busy */
255 	/* LINTED improper alignment */
256 	tmp = NEXT(sp);
257 	unprotect(tmp);
258 	CLRBIT1(SIZE(tmp));
259 	ASSERT(ISBIT0(SIZE(tmp)));
260 	protect(tmp);
261 
262 leftover:
263 	/* if the leftover is enough for a new free piece */
264 	if ((n = (SIZE(sp) - size)) >= MINSIZE + WORDSIZE) {
265 		n -= WORDSIZE;
266 		SIZE(sp) = size;
267 		/* LINTED improper alignment */
268 		tp = NEXT(sp);
269 		SIZE(tp) = n | BIT0;
270 		realfree(DATA(tp));
271 	} else if (BOTTOM(sp))
272 		Bottom = NULL;
273 
274 	/* return the allocated space */
275 	copy_pattern(LIVEPAT, sp);
276 	SIZE(sp) |= BIT0;
277 	protect(sp);
278 	return (DATA(sp));
279 }
280 
281 /*
282  *	realloc().
283  *	If the block size is increasing, we try forward merging first.
284  *	This is not best-fit but it avoids some data recopying.
285  */
286 void *
287 realloc(void *old, size_t size)
288 {
289 	TREE	*tp, *np;
290 	size_t	ts;
291 	char	*new;
292 
293 	COUNT(nrealloc);
294 
295 	/* check for size that could overflow calculations */
296 	if (size > MAX_MALLOC) {
297 		errno = ENOMEM;
298 		return (NULL);
299 	}
300 
301 	/* pointer to the block */
302 	(void) mutex_lock(&__watch_malloc_lock);
303 	if (old == NULL) {
304 		new = malloc_unlocked(size);
305 		(void) mutex_unlock(&__watch_malloc_lock);
306 		return (new);
307 	}
308 
309 	/* make sure that size is 0 mod ALIGN */
310 	ROUND(size);
311 
312 	/* LINTED improper alignment */
313 	tp = BLOCK(old);
314 	unprotect(tp);
315 	ts = SIZE(tp);
316 
317 	/* if the block was freed, data has been destroyed. */
318 	if (!ISBIT0(ts)) {
319 		/* XXX; complain here! */
320 		protect(tp);
321 		(void) mutex_unlock(&__watch_malloc_lock);
322 		errno = EINVAL;
323 		return (NULL);
324 	}
325 
326 	CLRBITS01(SIZE(tp));
327 	if (size == SIZE(tp)) {	/* nothing to do */
328 		SIZE(tp) = ts;
329 		protect(tp);
330 		(void) mutex_unlock(&__watch_malloc_lock);
331 		return (old);
332 	}
333 
334 	/* special cases involving small blocks */
335 	if (size < MINSIZE || SIZE(tp) < MINSIZE) {
336 		if (size == 0) {
337 			SETOLD01(SIZE(tp), ts);
338 			free_unlocked(old);
339 			(void) mutex_unlock(&__watch_malloc_lock);
340 			return (NULL);
341 		}
342 		goto call_malloc;
343 	}
344 
345 	/* block is increasing in size, try merging the next block */
346 	if (size > SIZE(tp)) {
347 		/* LINTED improper alignment */
348 		np = NEXT(tp);
349 		unprotect(np);
350 		if (ISBIT0(SIZE(np)))
351 			protect(np);
352 		else {
353 			TREE *tmp;
354 			ASSERT(SIZE(np) >= MINSIZE);
355 			ASSERT(!ISBIT1(SIZE(np)));
356 			SIZE(tp) += SIZE(np) + WORDSIZE;
357 			if (np != Bottom)
358 				t_delete(np);
359 			else
360 				Bottom = NULL;
361 			/* LINTED improper alignment */
362 			tmp = NEXT(np);
363 			unprotect(tmp);
364 			CLRBIT1(SIZE(tmp));
365 			protect(tmp);
366 		}
367 
368 		/* not enough & at TRUE end of memory, try extending core */
369 		if (size > SIZE(tp) && BOTTOM(tp) && GETCORE(0) == Baddr) {
370 			Bottom = tp;
371 			protect(Bottom);
372 			if ((tp = morecore(size)) == NULL) {
373 				tp = Bottom;
374 				Bottom = NULL;
375 				unprotect(tp);
376 			}
377 		}
378 	}
379 
380 	/* got enough space to use */
381 	if (size <= SIZE(tp)) {
382 		size_t n;
383 chop_big:
384 		if ((n = (SIZE(tp) - size)) >= MINSIZE + WORDSIZE) {
385 			n -= WORDSIZE;
386 			SIZE(tp) = size;
387 			/* LINTED improper alignment */
388 			np = NEXT(tp);
389 			SIZE(np) = n | BIT0;
390 			realfree(DATA(np));
391 		} else if (BOTTOM(tp))
392 			Bottom = NULL;
393 
394 		/* the previous block may be free */
395 		SETOLD01(SIZE(tp), ts);
396 		protect(tp);
397 		(void) mutex_unlock(&__watch_malloc_lock);
398 		return (old);
399 	}
400 
401 call_malloc:	/* call malloc to get a new block */
402 	SETOLD01(SIZE(tp), ts);
403 	if ((new = malloc_unlocked(size)) != NULL) {
404 		CLRBITS01(ts);
405 		if (ts > size)
406 			ts = size;
407 		(void) memcpy(new, old, ts);
408 		free_unlocked(old);
409 		(void) mutex_unlock(&__watch_malloc_lock);
410 		return (new);
411 	}
412 
413 	/*
414 	 * Attempt special case recovery allocations since malloc() failed:
415 	 *
416 	 * 1. size <= SIZE(tp) < MINSIZE
417 	 *	Simply return the existing block
418 	 * 2. SIZE(tp) < size < MINSIZE
419 	 *	malloc() may have failed to allocate the chunk of
420 	 *	small blocks. Try asking for MINSIZE bytes.
421 	 * 3. size < MINSIZE <= SIZE(tp)
422 	 *	malloc() may have failed as with 2.  Change to
423 	 *	MINSIZE allocation which is taken from the beginning
424 	 *	of the current block.
425 	 * 4. MINSIZE <= SIZE(tp) < size
426 	 *	If the previous block is free and the combination of
427 	 *	these two blocks has at least size bytes, then merge
428 	 *	the two blocks copying the existing contents backwards.
429 	 */
430 	CLRBITS01(SIZE(tp));
431 	if (SIZE(tp) < MINSIZE) {
432 		if (size < SIZE(tp))		/* case 1. */ {
433 			SETOLD01(SIZE(tp), ts);
434 			protect(tp);
435 			(void) mutex_unlock(&__watch_malloc_lock);
436 			return (old);
437 		} else if (size < MINSIZE)	/* case 2. */ {
438 			size = MINSIZE;
439 			goto call_malloc;
440 		}
441 	} else if (size < MINSIZE)		/* case 3. */ {
442 		size = MINSIZE;
443 		goto chop_big;
444 	} else if (ISBIT1(ts)) {
445 		np = LAST(tp);
446 		unprotect(np);
447 		if ((SIZE(np) + SIZE(tp) + WORDSIZE) >= size) {
448 			ASSERT(!ISBIT0(SIZE(np)));
449 			t_delete(np);
450 			SIZE(np) += SIZE(tp) + WORDSIZE;
451 			/*
452 			 * Since the copy may overlap, use memmove().
453 			 */
454 			(void) memmove(DATA(np), old, SIZE(tp));
455 			old = DATA(np);
456 			tp = np;
457 			CLRBIT1(ts);
458 			goto chop_big;
459 		}
460 		protect(np);
461 	}
462 	SETOLD01(SIZE(tp), ts);
463 	protect(tp);
464 	(void) mutex_unlock(&__watch_malloc_lock);
465 	/* malloc() sets errno */
466 	return (NULL);
467 }
468 
469 /*
470  *	realfree().
471  *	Coalescing of adjacent free blocks is done first.
472  *	Then, the new free block is leaf-inserted into the free tree
473  *	without splaying. This strategy does not guarantee the amortized
474  *	O(nlogn) behaviour for the insert/delete/find set of operations
475  *	on the tree. In practice, however, free is much more infrequent
476  *	than malloc/realloc and the tree searches performed by these
477  *	functions adequately keep the tree in balance.
478  */
479 static void
480 realfree(void *old)
481 {
482 	TREE	*tp, *sp, *np, *tmp;
483 	size_t	ts, size;
484 
485 	COUNT(nfree);
486 
487 	/* pointer to the block */
488 	/* LINTED improper alignment */
489 	tp = BLOCK(old);
490 	unprotect(tp);
491 	ts = SIZE(tp);
492 	if (!ISBIT0(ts)) {	/* block is not busy; previously freed? */
493 		protect(tp);	/* force a watchpoint trap */
494 		CLRBIT0(SIZE(tp));
495 		return;
496 	}
497 	CLRBITS01(SIZE(tp));
498 	copy_pattern(FREEPAT, tp);
499 
500 	/* small block, return it to the tail of its queue */
501 	if (SIZE(tp) < MINSIZE) {
502 		ASSERT(SIZE(tp) / WORDSIZE >= 1);
503 		ts = SIZE(tp) / WORDSIZE - 1;
504 		AFTER(tp) = NULL;
505 		protect(tp);
506 		if (List[ts] == NULL) {
507 			List[ts] = tp;
508 			Last[ts] = tp;
509 		} else {
510 			sp = Last[ts];
511 			unprotect(sp);
512 			AFTER(sp) = tp;
513 			protect(sp);
514 			Last[ts] = tp;
515 		}
516 		return;
517 	}
518 
519 	/* see if coalescing with next block is warranted */
520 	/* LINTED improper alignment */
521 	np = NEXT(tp);
522 	unprotect(np);
523 	if (ISBIT0(SIZE(np)))
524 		protect(np);
525 	else {
526 		if (np != Bottom)
527 			t_delete(np);
528 		SIZE(tp) += SIZE(np) + WORDSIZE;
529 	}
530 
531 	/* the same with the preceding block */
532 	if (ISBIT1(ts)) {
533 		np = LAST(tp);
534 		unprotect(np);
535 		ASSERT(!ISBIT0(SIZE(np)));
536 		ASSERT(np != Bottom);
537 		t_delete(np);
538 		SIZE(np) += SIZE(tp) + WORDSIZE;
539 		tp = np;
540 	}
541 
542 	/* initialize tree info */
543 	PARENT(tp) = LEFT(tp) = RIGHT(tp) = LINKFOR(tp) = NULL;
544 
545 	/* set bottom block, or insert in the free tree */
546 	if (BOTTOM(tp))
547 		Bottom = tp;
548 	else {
549 		/* search for the place to insert */
550 		if (Root) {
551 			size = SIZE(tp);
552 			np = Root;
553 			for (;;) {
554 				unprotect(np);
555 				if (SIZE(np) > size) {
556 					if ((tmp = LEFT(np)) != NULL) {
557 						protect(np);
558 						np = tmp;
559 					} else {
560 						LEFT(np) = tp;
561 						PARENT(tp) = np;
562 						protect(np);
563 						break;
564 					}
565 				} else if (SIZE(np) < size) {
566 					if ((tmp = RIGHT(np)) != NULL) {
567 						protect(np);
568 						np = tmp;
569 					} else {
570 						RIGHT(np) = tp;
571 						PARENT(tp) = np;
572 						protect(np);
573 						break;
574 					}
575 				} else {
576 					if ((sp = PARENT(np)) != NULL) {
577 						unprotect(sp);
578 						if (np == LEFT(sp))
579 							LEFT(sp) = tp;
580 						else
581 							RIGHT(sp) = tp;
582 						PARENT(tp) = sp;
583 						protect(sp);
584 					} else
585 						Root = tp;
586 
587 					/* insert to head of list */
588 					if ((sp = LEFT(np)) != NULL) {
589 						unprotect(sp);
590 						PARENT(sp) = tp;
591 						protect(sp);
592 					}
593 					LEFT(tp) = sp;
594 
595 					if ((sp = RIGHT(np)) != NULL) {
596 						unprotect(sp);
597 						PARENT(sp) = tp;
598 						protect(sp);
599 					}
600 					RIGHT(tp) = sp;
601 
602 					/* doubly link list */
603 					LINKFOR(tp) = np;
604 					LINKBAK(np) = tp;
605 					SETNOTREE(np);
606 					protect(np);
607 
608 					break;
609 				}
610 			}
611 		} else {
612 			Root = tp;
613 		}
614 	}
615 
616 	/*
617 	 * Tell next block that this one is free.
618 	 * The first WORD of the next block contains self's address.
619 	 */
620 	/* LINTED improper alignment */
621 	tmp = NEXT(tp);
622 	unprotect(tmp);
623 	/* LINTED improper alignment */
624 	*(SELFP(tp)) = tp;
625 	SETBIT1(SIZE(tmp));
626 	ASSERT(ISBIT0(SIZE(tmp)));
627 	protect(tmp);
628 	protect(tp);
629 }
630 
631 /*
632  * Get more core. Gaps in memory are noted as busy blocks.
633  */
634 static TREE *
635 morecore(size_t size)
636 {
637 	TREE	*tp;
638 	size_t	n, offset, requestsize;
639 	char	*addr;
640 
641 	/* compute new amount of memory to get */
642 	tp = Bottom;
643 	n = size + 2 * WORDSIZE;
644 	addr = GETCORE(0);
645 
646 	if (addr == ERRCORE)
647 		/* errno set by GETCORE sbrk */
648 		return (NULL);
649 
650 	/* need to pad size out so that addr is aligned */
651 	if ((((size_t)addr) % ALIGN) != 0)
652 		offset = ALIGN - (size_t)addr % ALIGN;
653 	else
654 		offset = 0;
655 
656 	if (tp)
657 		unprotect(tp);
658 
659 	/* if not segmented memory, what we need may be smaller */
660 	if (addr == Baddr) {
661 		n -= WORDSIZE;
662 		if (tp != NULL)
663 			n -= SIZE(tp);
664 	}
665 
666 	/* get a multiple of CORESIZE */
667 	n = ((n - 1) / CORESIZE + 1) * CORESIZE;
668 	requestsize = n + offset;
669 
670 	/* check if nsize request could overflow in GETCORE */
671 	if (requestsize > MAX_MALLOC - (size_t)addr) {
672 		if (tp)
673 			protect(tp);
674 		errno = ENOMEM;
675 		return (NULL);
676 	}
677 
678 	if (requestsize > MAX_GETCORE) {
679 		intptr_t	delta;
680 		/*
681 		 * the value required is too big for GETCORE() to deal with
682 		 * in one go, so use GETCORE() at most 2 times instead.
683 		 * Argument to GETCORE() must be multiple of ALIGN.
684 		 * If not, GETCORE(-MAX_GETCORE) will not return brk point
685 		 * to previous value, but will be ALIGN more.
686 		 * This would leave a small hole.
687 		 */
688 		delta = MAX_GETCORE;
689 		while (delta > 0) {
690 			if (GETCORE(delta) == ERRCORE) {
691 				if (tp)
692 					protect(tp);
693 				if (addr != GETCORE(0))
694 					(void) GETCORE(-MAX_GETCORE);
695 				return (NULL);
696 			}
697 			requestsize -= MAX_GETCORE;
698 			delta = requestsize;
699 		}
700 	} else if (GETCORE(requestsize) == ERRCORE) {
701 		if (tp)
702 			protect(tp);
703 		return (NULL);
704 	}
705 
706 	/* contiguous memory */
707 	if (addr == Baddr) {
708 		ASSERT(offset == 0);
709 		if (tp) {
710 			addr = ((char *)tp);
711 			n += SIZE(tp) + 2 * WORDSIZE;
712 		} else {
713 			addr = Baddr - WORDSIZE;
714 			n += WORDSIZE;
715 		}
716 	} else {
717 		addr += offset;
718 	}
719 
720 	/* new bottom address */
721 	Baddr = addr + n;
722 
723 	/* new bottom block */
724 	/* LINTED improper alignment */
725 	tp = ((TREE *)addr);
726 	SIZE(tp) = n - 2 * WORDSIZE;
727 	ASSERT((SIZE(tp) % ALIGN) == 0);
728 
729 	/* reserved the last word to head any noncontiguous memory */
730 	/* LINTED improper alignment */
731 	SETBIT0(SIZE(NEXT(tp)));
732 
733 	/* non-contiguous memory, free old bottom block */
734 	if (Bottom && Bottom != tp) {
735 		SETBIT0(SIZE(Bottom));
736 		realfree(DATA(Bottom));
737 	}
738 
739 	return (tp);
740 }
741 
742 /*
743  * Utility function to avoid protecting a tree node twice.
744  * Return true if tp is in the NULL-terminated array of tree nodes.
745  */
746 static int
747 in_list(TREE *tp, TREE **npp)
748 {
749 	TREE *sp;
750 
751 	while ((sp = *npp++) != NULL)
752 		if (tp == sp)
753 			return (1);
754 	return (0);
755 }
756 
757 /*
758  * Tree rotation functions (BU: bottom-up, TD: top-down).
759  * All functions are entered with the arguments unprotected.
760  * They must return in the same condition, with all other elements
761  * that have been unprotected during the operation re-protected.
762  */
763 static void
764 LEFT1(TREE *x, TREE *y)
765 {
766 	TREE *node[3];
767 	TREE **npp = node;
768 	TREE *tp;
769 
770 	if ((RIGHT(x) = LEFT(y)) != NULL) {
771 		unprotect(*npp++ = RIGHT(x));
772 		PARENT(RIGHT(x)) = x;
773 	}
774 	if ((PARENT(y) = PARENT(x)) != NULL) {
775 		unprotect(*npp++ = PARENT(x));
776 		if (LEFT(PARENT(x)) == x)
777 			LEFT(PARENT(y)) = y;
778 		else
779 			RIGHT(PARENT(y)) = y;
780 	}
781 	LEFT(y) = x;
782 	PARENT(x) = y;
783 
784 	*npp = NULL;
785 	npp = node;
786 	while ((tp = *npp++) != NULL)
787 		if (tp != x && tp != y && !in_list(tp, npp))
788 			protect(tp);
789 }
790 
791 static void
792 RIGHT1(TREE *x, TREE *y)
793 {
794 	TREE *node[3];
795 	TREE **npp = node;
796 	TREE *tp;
797 
798 	if ((LEFT(x) = RIGHT(y)) != NULL) {
799 		unprotect(*npp++ = LEFT(x));
800 		PARENT(LEFT(x)) = x;
801 	}
802 	if ((PARENT(y) = PARENT(x)) != NULL) {
803 		unprotect(*npp++ = PARENT(x));
804 		if (LEFT(PARENT(x)) == x)
805 			LEFT(PARENT(y)) = y;
806 		else
807 			RIGHT(PARENT(y)) = y;
808 	}
809 	RIGHT(y) = x;
810 	PARENT(x) = y;
811 
812 	*npp = NULL;
813 	npp = node;
814 	while ((tp = *npp++) != NULL)
815 		if (tp != x && tp != y && !in_list(tp, npp))
816 			protect(tp);
817 }
818 
819 static void
820 BULEFT2(TREE *x, TREE *y, TREE *z)
821 {
822 	TREE *node[4];
823 	TREE **npp = node;
824 	TREE *tp;
825 
826 	if ((RIGHT(x) = LEFT(y)) != NULL) {
827 		unprotect(*npp++ = RIGHT(x));
828 		PARENT(RIGHT(x)) = x;
829 	}
830 	if ((RIGHT(y) = LEFT(z)) != NULL) {
831 		unprotect(*npp++ = RIGHT(y));
832 		PARENT(RIGHT(y)) = y;
833 	}
834 	if ((PARENT(z) = PARENT(x)) != NULL) {
835 		unprotect(*npp++ = PARENT(x));
836 		if (LEFT(PARENT(x)) == x)
837 			LEFT(PARENT(z)) = z;
838 		else
839 			RIGHT(PARENT(z)) = z;
840 	}
841 	LEFT(z) = y;
842 	PARENT(y) = z;
843 	LEFT(y) = x;
844 	PARENT(x) = y;
845 
846 	*npp = NULL;
847 	npp = node;
848 	while ((tp = *npp++) != NULL)
849 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
850 			protect(tp);
851 }
852 
853 static void
854 BURIGHT2(TREE *x, TREE *y, TREE *z)
855 {
856 	TREE *node[4];
857 	TREE **npp = node;
858 	TREE *tp;
859 
860 	if ((LEFT(x) = RIGHT(y)) != NULL) {
861 		unprotect(*npp++ = LEFT(x));
862 		PARENT(LEFT(x)) = x;
863 	}
864 	if ((LEFT(y) = RIGHT(z)) != NULL) {
865 		unprotect(*npp++ = LEFT(y));
866 		PARENT(LEFT(y)) = y;
867 	}
868 	if ((PARENT(z) = PARENT(x)) != NULL) {
869 		unprotect(*npp++ = PARENT(x));
870 		if (LEFT(PARENT(x)) == x)
871 			LEFT(PARENT(z)) = z;
872 		else
873 			RIGHT(PARENT(z)) = z;
874 	}
875 	RIGHT(z) = y;
876 	PARENT(y) = z;
877 	RIGHT(y) = x;
878 	PARENT(x) = y;
879 
880 	*npp = NULL;
881 	npp = node;
882 	while ((tp = *npp++) != NULL)
883 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
884 			protect(tp);
885 }
886 
887 static void
888 TDLEFT2(TREE *x, TREE *y, TREE *z)
889 {
890 	TREE *node[3];
891 	TREE **npp = node;
892 	TREE *tp;
893 
894 	if ((RIGHT(y) = LEFT(z)) != NULL) {
895 		unprotect(*npp++ = RIGHT(y));
896 		PARENT(RIGHT(y)) = y;
897 	}
898 	if ((PARENT(z) = PARENT(x)) != NULL) {
899 		unprotect(*npp++ = PARENT(x));
900 		if (LEFT(PARENT(x)) == x)
901 			LEFT(PARENT(z)) = z;
902 		else
903 			RIGHT(PARENT(z)) = z;
904 	}
905 	PARENT(x) = z;
906 	LEFT(z) = x;
907 
908 	*npp = NULL;
909 	npp = node;
910 	while ((tp = *npp++) != NULL)
911 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
912 			protect(tp);
913 }
914 
915 #if 0	/* Not used, for now */
916 static void
917 TDRIGHT2(TREE *x, TREE *y, TREE *z)
918 {
919 	TREE *node[3];
920 	TREE **npp = node;
921 	TREE *tp;
922 
923 	if ((LEFT(y) = RIGHT(z)) != NULL) {
924 		unprotect(*npp++ = LEFT(y));
925 		PARENT(LEFT(y)) = y;
926 	}
927 	if ((PARENT(z) = PARENT(x)) != NULL) {
928 		unprotect(*npp++ = PARENT(x));
929 		if (LEFT(PARENT(x)) == x)
930 			LEFT(PARENT(z)) = z;
931 		else
932 			RIGHT(PARENT(z)) = z;
933 	}
934 	PARENT(x) = z;
935 	RIGHT(z) = x;
936 
937 	*npp = NULL;
938 	npp = node;
939 	while ((tp = *npp++) != NULL)
940 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
941 			protect(tp);
942 }
943 #endif
944 
945 /*
946  *	Delete a tree element
947  */
948 static void
949 t_delete(TREE *op)
950 {
951 	TREE *tp, *sp, *gp;
952 
953 	/* if this is a non-tree node */
954 	if (ISNOTREE(op)) {
955 		tp = LINKBAK(op);
956 		unprotect(tp);
957 		if ((sp = LINKFOR(op)) != NULL) {
958 			unprotect(sp);
959 			LINKBAK(sp) = tp;
960 			protect(sp);
961 		}
962 		LINKFOR(tp) = sp;
963 		protect(tp);
964 		return;
965 	}
966 
967 	/* make op the root of the tree */
968 	if (PARENT(op))
969 		t_splay(op);
970 
971 	/* if this is the start of a list */
972 	if ((tp = LINKFOR(op)) != NULL) {
973 		unprotect(tp);
974 		PARENT(tp) = NULL;
975 		if ((sp = LEFT(op)) != NULL) {
976 			unprotect(sp);
977 			PARENT(sp) = tp;
978 			protect(sp);
979 		}
980 		LEFT(tp) = sp;
981 
982 		if ((sp = RIGHT(op)) != NULL) {
983 			unprotect(sp);
984 			PARENT(sp) = tp;
985 			protect(sp);
986 		}
987 		RIGHT(tp) = sp;
988 
989 		Root = tp;
990 		protect(tp);
991 		return;
992 	}
993 
994 	/* if op has a non-null left subtree */
995 	if ((tp = LEFT(op)) != NULL) {
996 		unprotect(tp);
997 		PARENT(tp) = NULL;
998 		if (RIGHT(op)) {
999 			/* make the right-end of the left subtree its root */
1000 			while ((sp = RIGHT(tp)) != NULL) {
1001 				unprotect(sp);
1002 				if ((gp = RIGHT(sp)) != NULL) {
1003 					unprotect(gp);
1004 					TDLEFT2(tp, sp, gp);
1005 					protect(sp);
1006 					protect(tp);
1007 					tp = gp;
1008 				} else {
1009 					LEFT1(tp, sp);
1010 					protect(tp);
1011 					tp = sp;
1012 				}
1013 			}
1014 
1015 			/* hook the right subtree of op to the above elt */
1016 			RIGHT(tp) = sp = RIGHT(op);
1017 			unprotect(sp);
1018 			PARENT(sp) = tp;
1019 			protect(sp);
1020 		}
1021 		protect(tp);
1022 	} else if ((tp = RIGHT(op)) != NULL) {	/* no left subtree */
1023 		unprotect(tp);
1024 		PARENT(tp) = NULL;
1025 		protect(tp);
1026 	}
1027 
1028 	Root = tp;
1029 }
1030 
1031 /*
1032  *	Bottom up splaying (simple version).
1033  *	The basic idea is to roughly cut in half the
1034  *	path from Root to tp and make tp the new root.
1035  */
1036 static void
1037 t_splay(TREE *tp)
1038 {
1039 	TREE *pp, *gp;
1040 
1041 	/* iterate until tp is the root */
1042 	while ((pp = PARENT(tp)) != NULL) {
1043 		unprotect(pp);
1044 		/* grandparent of tp */
1045 		gp = PARENT(pp);
1046 		if (gp)
1047 			unprotect(gp);
1048 
1049 		/* x is a left child */
1050 		if (LEFT(pp) == tp) {
1051 			if (gp && LEFT(gp) == pp) {
1052 				BURIGHT2(gp, pp, tp);
1053 				protect(gp);
1054 			} else {
1055 				if (gp)
1056 					protect(gp);
1057 				RIGHT1(pp, tp);
1058 			}
1059 		} else {
1060 			ASSERT(RIGHT(pp) == tp);
1061 			if (gp && RIGHT(gp) == pp) {
1062 				BULEFT2(gp, pp, tp);
1063 				protect(gp);
1064 			} else {
1065 				if (gp)
1066 					protect(gp);
1067 				LEFT1(pp, tp);
1068 			}
1069 		}
1070 		protect(pp);
1071 		unprotect(tp);	/* just in case */
1072 	}
1073 }
1074 
1075 void
1076 free(void *old)
1077 {
1078 	(void) mutex_lock(&__watch_malloc_lock);
1079 	free_unlocked(old);
1080 	(void) mutex_unlock(&__watch_malloc_lock);
1081 }
1082 
1083 
1084 static void
1085 free_unlocked(void *old)
1086 {
1087 	if (old != NULL)
1088 		realfree(old);
1089 }
1090 
1091 
1092 /*
1093  * memalign(align,nbytes)
1094  *
1095  * Description:
1096  *	Returns a block of specified size on a specified alignment boundary.
1097  *
1098  * Algorithm:
1099  *	Malloc enough to ensure that a block can be aligned correctly.
1100  *	Find the alignment point and return the fragments
1101  *	before and after the block.
1102  *
1103  * Errors:
1104  *	Returns NULL and sets errno as follows:
1105  *	[EINVAL]
1106  *		if nbytes = 0,
1107  *		or if alignment is misaligned,
1108  *		or if the heap has been detectably corrupted.
1109  *	[ENOMEM]
1110  *		if the requested memory could not be allocated.
1111  */
1112 
1113 #define	misaligned(p)		((unsigned)(p) & 3)
1114 		/* 4-byte "word" alignment is considered ok in LP64 */
1115 #define	nextblk(p, size)	((TREE *)((char *)(p) + (size)))
1116 
1117 void *
1118 memalign(size_t align, size_t nbytes)
1119 {
1120 	size_t	reqsize;	/* Num of bytes to get from malloc() */
1121 	TREE	*p;		/* Ptr returned from malloc() */
1122 	TREE	*blk;		/* For addressing fragment blocks */
1123 	size_t	blksize;	/* Current (shrinking) block size */
1124 	TREE	*alignedp;	/* Ptr to properly aligned boundary */
1125 	TREE	*aligned_blk;	/* The block to be returned */
1126 	size_t	frag_size;	/* size of fragments fore and aft */
1127 	size_t	x;
1128 
1129 	/*
1130 	 * check for valid size and alignment parameters
1131 	 * MAX_ALIGN check prevents overflow in later calculation.
1132 	 */
1133 	if (nbytes == 0 || misaligned(align) || align == 0 ||
1134 	    align > MAX_ALIGN) {
1135 		errno = EINVAL;
1136 		return (NULL);
1137 	}
1138 
1139 	/*
1140 	 * Malloc enough memory to guarantee that the result can be
1141 	 * aligned correctly. The worst case is when malloc returns
1142 	 * a block so close to the next alignment boundary that a
1143 	 * fragment of minimum size cannot be created.  In order to
1144 	 * make sure we can handle this, we need to force the
1145 	 * alignment to be at least as large as the minimum frag size
1146 	 * (MINSIZE + WORDSIZE).
1147 	 */
1148 
1149 	/* check for size that could overflow ROUND calculation */
1150 	if (nbytes > MAX_MALLOC) {
1151 		errno = ENOMEM;
1152 		return (NULL);
1153 	}
1154 	ROUND(nbytes);
1155 	if (nbytes < MINSIZE)
1156 		nbytes = MINSIZE;
1157 	ROUND(align);
1158 	while (align < MINSIZE + WORDSIZE)
1159 		align <<= 1;
1160 	reqsize = nbytes + align + (MINSIZE + WORDSIZE);
1161 	/* check for overflow */
1162 	if (reqsize < nbytes) {
1163 		errno = ENOMEM;
1164 		return (NULL);
1165 	}
1166 	p = (TREE *) malloc(reqsize);
1167 	if (p == (TREE *) NULL) {
1168 		/* malloc sets errno */
1169 		return (NULL);
1170 	}
1171 	(void) mutex_lock(&__watch_malloc_lock);
1172 
1173 	/*
1174 	 * get size of the entire block (overhead and all)
1175 	 */
1176 	/* LINTED improper alignment */
1177 	blk = BLOCK(p);			/* back up to get length word */
1178 	unprotect(blk);
1179 	blksize = SIZE(blk);
1180 	CLRBITS01(blksize);
1181 
1182 	/*
1183 	 * locate the proper alignment boundary within the block.
1184 	 */
1185 	x = (size_t)p;
1186 	if (x % align != 0)
1187 		x += align - (x % align);
1188 	alignedp = (TREE *)x;
1189 	/* LINTED improper alignment */
1190 	aligned_blk = BLOCK(alignedp);
1191 
1192 	/*
1193 	 * Check out the space to the left of the alignment
1194 	 * boundary, and split off a fragment if necessary.
1195 	 */
1196 	frag_size = (size_t)aligned_blk - (size_t)blk;
1197 	if (frag_size != 0) {
1198 		/*
1199 		 * Create a fragment to the left of the aligned block.
1200 		 */
1201 		if (frag_size < MINSIZE + WORDSIZE) {
1202 			/*
1203 			 * Not enough space. So make the split
1204 			 * at the other end of the alignment unit.
1205 			 * We know this yields enough space, because
1206 			 * we forced align >= MINSIZE + WORDSIZE above.
1207 			 */
1208 			frag_size += align;
1209 			/* LINTED improper alignment */
1210 			aligned_blk = nextblk(aligned_blk, align);
1211 		}
1212 		blksize -= frag_size;
1213 		SIZE(aligned_blk) = blksize | BIT0;
1214 		frag_size -= WORDSIZE;
1215 		SIZE(blk) = frag_size | BIT0 | ISBIT1(SIZE(blk));
1216 		free_unlocked(DATA(blk));
1217 		/*
1218 		 * free_unlocked(DATA(blk)) has the side-effect of calling
1219 		 * protect() on the block following blk, that is, aligned_blk.
1220 		 * We recover from this by unprotect()ing it here.
1221 		 */
1222 		unprotect(aligned_blk);
1223 	}
1224 
1225 	/*
1226 	 * Is there a (sufficiently large) fragment to the
1227 	 * right of the aligned block?
1228 	 */
1229 	frag_size = blksize - nbytes;
1230 	if (frag_size >= MINSIZE + WORDSIZE) {
1231 		/*
1232 		 * split and free a fragment on the right
1233 		 */
1234 		blksize = SIZE(aligned_blk);
1235 		SIZE(aligned_blk) = nbytes;
1236 		/* LINTED improper alignment */
1237 		blk = NEXT(aligned_blk);
1238 		SETOLD01(SIZE(aligned_blk), blksize);
1239 		frag_size -= WORDSIZE;
1240 		SIZE(blk) = frag_size | BIT0;
1241 		free_unlocked(DATA(blk));
1242 	}
1243 	copy_pattern(LIVEPAT, aligned_blk);
1244 	protect(aligned_blk);
1245 	(void) mutex_unlock(&__watch_malloc_lock);
1246 	return (DATA(aligned_blk));
1247 }
1248 
1249 void *
1250 valloc(size_t size)
1251 {
1252 	static unsigned pagesize;
1253 	if (!pagesize)
1254 		pagesize = _sysconf(_SC_PAGESIZE);
1255 	return (memalign(pagesize, size));
1256 }
1257 
1258 void *
1259 calloc(size_t num, size_t size)
1260 {
1261 	void *mp;
1262 	size_t total;
1263 
1264 	total = num * size;
1265 
1266 	/* check for overflow */
1267 	if (num != 0 && total / num != size) {
1268 		errno = ENOMEM;
1269 		return (NULL);
1270 	}
1271 	if ((mp = malloc(total)) != NULL)
1272 		(void) memset(mp, 0, total);
1273 	return (mp);
1274 }
1275 
1276 /* ARGSUSED1 */
1277 void
1278 cfree(void *p, size_t num, size_t size)
1279 {
1280 	free(p);
1281 }
1282 
1283 typedef struct {
1284 	long cmd;
1285 	prwatch_t prwatch;
1286 } ctl_t;
1287 
1288 static pid_t my_pid = 0;	/* to check for whether we fork()d */
1289 static int dont_watch = 0;
1290 static int do_stop = 0;
1291 static int ctlfd = -1;
1292 struct stat ctlstatb;
1293 static int wflags = WA_WRITE;
1294 
1295 static void
1296 init_watch()
1297 {
1298 	char str[80];
1299 	char *s;
1300 
1301 	my_pid = getpid();
1302 
1303 	dont_watch = 1;
1304 
1305 	if ((s = getenv("MALLOC_DEBUG")) == NULL)
1306 		return;
1307 
1308 	s = strncpy(str, s, sizeof (str));
1309 	while (s != NULL) {
1310 		char *e = strchr(s, ',');
1311 		if (e)
1312 			*e++ = '\0';
1313 		if (strcmp(s, "STOP") == 0)
1314 			do_stop = 1;
1315 		else if (strcmp(s, "WATCH") == 0)
1316 			dont_watch = 0;
1317 		else if (strcmp(s, "RW") == 0) {
1318 			dont_watch = 0;
1319 			wflags = WA_READ|WA_WRITE;
1320 		}
1321 		s = e;
1322 	}
1323 
1324 	if (dont_watch)
1325 		return;
1326 
1327 	if ((ctlfd = open("/proc/self/ctl", O_WRONLY)) < 0 ||
1328 	    fstat(ctlfd, &ctlstatb) != 0) {
1329 		if (ctlfd >= 0)
1330 			(void) close(ctlfd);
1331 		ctlfd = -1;
1332 		dont_watch = 1;
1333 		return;
1334 	}
1335 	/* close-on-exec */
1336 	(void) fcntl(ctlfd, F_SETFD, 1);
1337 
1338 	if (do_stop) {
1339 		int pfd;
1340 		pstatus_t pstatus;
1341 		struct {
1342 			long cmd;
1343 			fltset_t fltset;
1344 		} ctl;
1345 
1346 		/*
1347 		 * Play together with some /proc controller
1348 		 * that has set other stop-on-fault flags.
1349 		 */
1350 		premptyset(&ctl.fltset);
1351 		if ((pfd = open("/proc/self/status", O_RDONLY)) >= 0) {
1352 			if (read(pfd, &pstatus, sizeof (pstatus))
1353 			    == sizeof (pstatus))
1354 				ctl.fltset = pstatus.pr_flttrace;
1355 			(void) close(pfd);
1356 		}
1357 		praddset(&ctl.fltset, FLTWATCH);
1358 		ctl.cmd = PCSFAULT;
1359 		(void) write(ctlfd, &ctl, sizeof (ctl));
1360 	}
1361 }
1362 
1363 static int
1364 nowatch()
1365 {
1366 	struct stat statb;
1367 
1368 	if (dont_watch)
1369 		return (1);
1370 	if (ctlfd < 0)	/* first time */
1371 		init_watch();
1372 	else if (fstat(ctlfd, &statb) != 0 ||
1373 	    statb.st_dev != ctlstatb.st_dev ||
1374 	    statb.st_ino != ctlstatb.st_ino) {
1375 		/*
1376 		 * Someone closed our file descriptor.
1377 		 * Just open another one.
1378 		 */
1379 		if ((ctlfd = open("/proc/self/ctl", O_WRONLY)) < 0 ||
1380 		    fstat(ctlfd, &ctlstatb) != 0) {
1381 			if (ctlfd >= 0)
1382 				(void) close(ctlfd);
1383 			ctlfd = -1;
1384 			dont_watch = 1;
1385 			return (1);
1386 		}
1387 		/* close-on-exec */
1388 		(void) fcntl(ctlfd, F_SETFD, 1);
1389 	}
1390 	if (my_pid != getpid()) {
1391 		/*
1392 		 * We fork()d since the last call to the allocator.
1393 		 * watchpoints are not inherited across fork().
1394 		 * XXX: how to recover from this ???
1395 		 */
1396 		dont_watch = 1;
1397 		(void) close(ctlfd);
1398 		ctlfd = -1;
1399 	}
1400 	return (dont_watch);
1401 }
1402 
1403 static void
1404 protect(TREE *tp)
1405 {
1406 	ctl_t ctl;
1407 	size_t size, sz;
1408 
1409 	if (nowatch())
1410 		return;
1411 	if (tp == NULL || DATA(tp) == Baddr)
1412 		return;
1413 
1414 	sz = size = SIZE(tp);
1415 	CLRBITS01(size);
1416 	if (size == 0)
1417 		return;
1418 	if (ISBIT0(sz))		/* block is busy, protect only the head */
1419 		size = 0;
1420 	ctl.cmd = PCWATCH;
1421 	ctl.prwatch.pr_vaddr = (uintptr_t)tp;
1422 	ctl.prwatch.pr_size = size + WORDSIZE;
1423 	ctl.prwatch.pr_wflags = wflags;
1424 	(void) write(ctlfd, &ctl, sizeof (ctl));
1425 }
1426 
1427 static void
1428 unprotect(TREE *tp)
1429 {
1430 	ctl_t ctl;
1431 
1432 	if (nowatch())
1433 		return;
1434 	if (tp == NULL || DATA(tp) == Baddr)
1435 		return;
1436 
1437 	ctl.cmd = PCWATCH;
1438 	ctl.prwatch.pr_vaddr = (uintptr_t)tp;
1439 	ctl.prwatch.pr_size = WORDSIZE;		/* size is arbitrary */
1440 	ctl.prwatch.pr_wflags = 0;		/* clear the watched area */
1441 	(void) write(ctlfd, &ctl, sizeof (ctl));
1442 }
1443 
1444 static void
1445 malloc_prepare()
1446 {
1447 	(void) mutex_lock(&__watch_malloc_lock);
1448 }
1449 
1450 static void
1451 malloc_release()
1452 {
1453 	(void) mutex_unlock(&__watch_malloc_lock);
1454 }
1455 
1456 #pragma init(malloc_init)
1457 static void
1458 malloc_init(void)
1459 {
1460 	(void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
1461 }
1462