xref: /illumos-gate/usr/src/lib/watchmalloc/common/malloc.c (revision 7a6d80f1660abd4755c68cbd094d4a914681d26e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 /*
31  *	Memory management: malloc(), realloc(), free(), memalign().
32  *
33  *	The following #-parameters may be redefined:
34  *	GETCORE: a function to get more core memory.
35  *		GETCORE(0) is assumed to return the next available
36  *		address. Default is 'sbrk'.
37  *	ERRCORE: the error code as returned by GETCORE.
38  *		Default is ((char *)(-1)).
39  *	CORESIZE: a desired unit (measured in bytes) to be used
40  *		with GETCORE. Default is (1024*ALIGN).
41  *
42  *	This algorithm is based on a best fit strategy with lists of
43  *	free elts maintained in a self-adjusting binary tree. Each list
44  *	contains all elts of the same size. The tree is ordered by size.
45  *	For results on self-adjusting trees, see the paper:
46  *		Self-Adjusting Binary Trees,
47  *		DD Sleator & RE Tarjan, JACM 1985.
48  *
49  *	The header of a block contains the size of the data part in bytes.
50  *	Since the size of a block is 0%4, the low two bits of the header
51  *	are free and used as follows:
52  *
53  *		BIT0:	1 for busy (block is in use), 0 for free.
54  *		BIT1:	if the block is busy, this bit is 1 if the
55  *			preceding block in contiguous memory is free.
56  *			Otherwise, it is always 0.
57  */
58 
59 #include "mallint.h"
60 
61 static	mutex_t	__watch_malloc_lock = DEFAULTMUTEX;
62 
63 static	TREE	*Root;		/* root of the free tree */
64 static	TREE	*Bottom;	/* the last free chunk in the arena */
65 static	char	*Baddr;		/* current high address of the arena */
66 
67 static	void	t_delete(TREE *);
68 static	void	t_splay(TREE *);
69 static	void	realfree(void *);
70 static	void	*malloc_unlocked(size_t);
71 static	void	free_unlocked(void *);
72 static	TREE	*morecore(size_t);
73 
74 static	void	protect(TREE *);
75 static	void	unprotect(TREE *);
76 
77 #define	FREEPAT	0
78 #define	LIVEPAT	1
79 
80 /*
81  * Patterns to be copied into freed blocks and allocated blocks.
82  * 0xfeedbeef and 0xfeedface are invalid pointer values in all programs.
83  */
84 static	uint64_t	patterns[2] = {
85 	0xdeadbeefdeadbeefULL,	/* pattern in a freed block */
86 	0xbaddcafebaddcafeULL	/* pattern in an allocated block */
87 };
88 
89 static void
90 copy_pattern(int pat, TREE *tp)
91 {
92 	uint64_t pattern = patterns[pat];
93 	size_t sz = SIZE(tp) / sizeof (uint64_t);
94 	/* LINTED improper alignment */
95 	uint64_t *datap = (uint64_t *)DATA(tp);
96 
97 	while (sz--)
98 		*datap++ = pattern;
99 }
100 
101 /*
102  * Keep lists of small blocks, LIFO order.
103  */
104 static	TREE	*List[MINSIZE/WORDSIZE-1];
105 static	TREE	*Last[MINSIZE/WORDSIZE-1];
106 
107 /* number of blocks to get at one time */
108 #define	NPS	(WORDSIZE*8)
109 
110 static void *
111 smalloc(size_t size)
112 {
113 	TREE	*tp;
114 	size_t	i;
115 
116 	ASSERT(size % WORDSIZE == 0);
117 	/* want to return a unique pointer on malloc(0) */
118 	if (size == 0)
119 		size = WORDSIZE;
120 
121 	/* list to use */
122 	i = size / WORDSIZE - 1;
123 
124 	if (List[i] == NULL) {
125 		TREE	*np;
126 		int	n;
127 		ASSERT((size + WORDSIZE) * NPS >= MINSIZE);
128 
129 		/* get NPS of these block types */
130 		if ((np = malloc_unlocked((size + WORDSIZE)*NPS)) == NULL)
131 			return (NULL);
132 
133 		/* make them into a link list */
134 		for (n = 0, List[i] = np; n < NPS; ++n) {
135 			tp = np;
136 			SIZE(tp) = size;
137 			copy_pattern(FREEPAT, tp);
138 			if (n == NPS - 1) {
139 				Last[i] = tp;
140 				np = NULL;
141 			} else {
142 				/* LINTED improper alignment */
143 				np = NEXT(tp);
144 			}
145 			AFTER(tp) = np;
146 			protect(tp);
147 		}
148 	}
149 
150 	/* allocate from the head of the queue */
151 	tp = List[i];
152 	unprotect(tp);
153 	if ((List[i] = AFTER(tp)) == NULL)
154 		Last[i] = NULL;
155 	copy_pattern(LIVEPAT, tp);
156 	SETBIT0(SIZE(tp));
157 	protect(tp);
158 	return (DATA(tp));
159 }
160 
161 void *
162 malloc(size_t size)
163 {
164 	void	*ret;
165 	(void) mutex_lock(&__watch_malloc_lock);
166 	ret = malloc_unlocked(size);
167 	(void) mutex_unlock(&__watch_malloc_lock);
168 	return (ret);
169 }
170 
171 static void *
172 malloc_unlocked(size_t size)
173 {
174 	size_t	n;
175 	TREE	*tp, *sp, *tmp;
176 
177 	COUNT(nmalloc);
178 	ASSERT(WORDSIZE == ALIGN);
179 
180 	/* check for size that could overflow calculations */
181 	if (size > MAX_MALLOC) {
182 		errno = ENOMEM;
183 		return (NULL);
184 	}
185 	/* make sure that size is 0 mod ALIGN */
186 	ROUND(size);
187 
188 	/* small blocks */
189 	if (size < MINSIZE)
190 		return (smalloc(size));
191 
192 	/* search for an elt of the right size */
193 	sp = NULL;
194 	n = 0;
195 	if (Root) {
196 		tp = Root;
197 		for (;;) {
198 			unprotect(tp);
199 			if (SIZE(tp) >= size) {	/* branch left */
200 				if (n == 0 || n >= SIZE(tp)) {
201 					sp = tp;
202 					n = SIZE(tp);
203 				}
204 				if ((tmp = LEFT(tp)) != NULL) {
205 					protect(tp);
206 					tp = tmp;
207 				} else {
208 					protect(tp);
209 					break;
210 				}
211 			} else {		/* branch right */
212 				if ((tmp = RIGHT(tp)) != NULL) {
213 					protect(tp);
214 					tp = tmp;
215 				} else {
216 					protect(tp);
217 					break;
218 				}
219 			}
220 		}
221 
222 		if (sp) {
223 			unprotect(sp);
224 			t_delete(sp);
225 		} else if (tp != Root) {
226 			/* make the searched-to element the root */
227 			unprotect(tp);
228 			t_splay(tp);
229 			protect(tp);
230 			Root = tp;
231 		}
232 	}
233 
234 	/* if found none fitted in the tree */
235 	if (sp == NULL) {
236 		if (Bottom) {
237 			unprotect(Bottom);
238 			if (size <= SIZE(Bottom)) {
239 				sp = Bottom;
240 				CLRBITS01(SIZE(sp));
241 			} else {
242 				protect(Bottom);
243 				if ((sp = morecore(size)) == NULL)
244 					return (NULL);
245 			}
246 		} else {
247 			if ((sp = morecore(size)) == NULL)
248 				return (NULL);
249 		}
250 	}
251 
252 	/* tell the forward neighbor that we're busy */
253 	/* LINTED improper alignment */
254 	tmp = NEXT(sp);
255 	unprotect(tmp);
256 	CLRBIT1(SIZE(tmp));
257 	ASSERT(ISBIT0(SIZE(tmp)));
258 	protect(tmp);
259 
260 leftover:
261 	/* if the leftover is enough for a new free piece */
262 	if ((n = (SIZE(sp) - size)) >= MINSIZE + WORDSIZE) {
263 		n -= WORDSIZE;
264 		SIZE(sp) = size;
265 		/* LINTED improper alignment */
266 		tp = NEXT(sp);
267 		SIZE(tp) = n | BIT0;
268 		realfree(DATA(tp));
269 	} else if (BOTTOM(sp))
270 		Bottom = NULL;
271 
272 	/* return the allocated space */
273 	copy_pattern(LIVEPAT, sp);
274 	SIZE(sp) |= BIT0;
275 	protect(sp);
276 	return (DATA(sp));
277 }
278 
279 /*
280  *	realloc().
281  *	If the block size is increasing, we try forward merging first.
282  *	This is not best-fit but it avoids some data recopying.
283  */
284 void *
285 realloc(void *old, size_t size)
286 {
287 	TREE	*tp, *np;
288 	size_t	ts;
289 	char	*new;
290 
291 	COUNT(nrealloc);
292 
293 	/* check for size that could overflow calculations */
294 	if (size > MAX_MALLOC) {
295 		errno = ENOMEM;
296 		return (NULL);
297 	}
298 
299 	/* pointer to the block */
300 	(void) mutex_lock(&__watch_malloc_lock);
301 	if (old == NULL) {
302 		new = malloc_unlocked(size);
303 		(void) mutex_unlock(&__watch_malloc_lock);
304 		return (new);
305 	}
306 
307 	/* make sure that size is 0 mod ALIGN */
308 	ROUND(size);
309 
310 	/* LINTED improper alignment */
311 	tp = BLOCK(old);
312 	unprotect(tp);
313 	ts = SIZE(tp);
314 
315 	/* if the block was freed, data has been destroyed. */
316 	if (!ISBIT0(ts)) {
317 		/* XXX; complain here! */
318 		protect(tp);
319 		(void) mutex_unlock(&__watch_malloc_lock);
320 		errno = EINVAL;
321 		return (NULL);
322 	}
323 
324 	CLRBITS01(SIZE(tp));
325 	if (size == SIZE(tp)) {	/* nothing to do */
326 		SIZE(tp) = ts;
327 		protect(tp);
328 		(void) mutex_unlock(&__watch_malloc_lock);
329 		return (old);
330 	}
331 
332 	/* special cases involving small blocks */
333 	if (size < MINSIZE || SIZE(tp) < MINSIZE) {
334 		if (size == 0) {
335 			SETOLD01(SIZE(tp), ts);
336 			free_unlocked(old);
337 			(void) mutex_unlock(&__watch_malloc_lock);
338 			return (NULL);
339 		}
340 		goto call_malloc;
341 	}
342 
343 	/* block is increasing in size, try merging the next block */
344 	if (size > SIZE(tp)) {
345 		/* LINTED improper alignment */
346 		np = NEXT(tp);
347 		unprotect(np);
348 		if (ISBIT0(SIZE(np)))
349 			protect(np);
350 		else {
351 			TREE *tmp;
352 			ASSERT(SIZE(np) >= MINSIZE);
353 			ASSERT(!ISBIT1(SIZE(np)));
354 			SIZE(tp) += SIZE(np) + WORDSIZE;
355 			if (np != Bottom)
356 				t_delete(np);
357 			else
358 				Bottom = NULL;
359 			/* LINTED improper alignment */
360 			tmp = NEXT(np);
361 			unprotect(tmp);
362 			CLRBIT1(SIZE(tmp));
363 			protect(tmp);
364 		}
365 
366 		/* not enough & at TRUE end of memory, try extending core */
367 		if (size > SIZE(tp) && BOTTOM(tp) && GETCORE(0) == Baddr) {
368 			Bottom = tp;
369 			protect(Bottom);
370 			if ((tp = morecore(size)) == NULL) {
371 				tp = Bottom;
372 				Bottom = NULL;
373 				unprotect(tp);
374 			}
375 		}
376 	}
377 
378 	/* got enough space to use */
379 	if (size <= SIZE(tp)) {
380 		size_t n;
381 chop_big:
382 		if ((n = (SIZE(tp) - size)) >= MINSIZE + WORDSIZE) {
383 			n -= WORDSIZE;
384 			SIZE(tp) = size;
385 			/* LINTED improper alignment */
386 			np = NEXT(tp);
387 			SIZE(np) = n | BIT0;
388 			realfree(DATA(np));
389 		} else if (BOTTOM(tp))
390 			Bottom = NULL;
391 
392 		/* the previous block may be free */
393 		SETOLD01(SIZE(tp), ts);
394 		protect(tp);
395 		(void) mutex_unlock(&__watch_malloc_lock);
396 		return (old);
397 	}
398 
399 call_malloc:	/* call malloc to get a new block */
400 	SETOLD01(SIZE(tp), ts);
401 	if ((new = malloc_unlocked(size)) != NULL) {
402 		CLRBITS01(ts);
403 		if (ts > size)
404 			ts = size;
405 		(void) memcpy(new, old, ts);
406 		free_unlocked(old);
407 		(void) mutex_unlock(&__watch_malloc_lock);
408 		return (new);
409 	}
410 
411 	/*
412 	 * Attempt special case recovery allocations since malloc() failed:
413 	 *
414 	 * 1. size <= SIZE(tp) < MINSIZE
415 	 *	Simply return the existing block
416 	 * 2. SIZE(tp) < size < MINSIZE
417 	 *	malloc() may have failed to allocate the chunk of
418 	 *	small blocks. Try asking for MINSIZE bytes.
419 	 * 3. size < MINSIZE <= SIZE(tp)
420 	 *	malloc() may have failed as with 2.  Change to
421 	 *	MINSIZE allocation which is taken from the beginning
422 	 *	of the current block.
423 	 * 4. MINSIZE <= SIZE(tp) < size
424 	 *	If the previous block is free and the combination of
425 	 *	these two blocks has at least size bytes, then merge
426 	 *	the two blocks copying the existing contents backwards.
427 	 */
428 	CLRBITS01(SIZE(tp));
429 	if (SIZE(tp) < MINSIZE) {
430 		if (size < SIZE(tp))		/* case 1. */ {
431 			SETOLD01(SIZE(tp), ts);
432 			protect(tp);
433 			(void) mutex_unlock(&__watch_malloc_lock);
434 			return (old);
435 		} else if (size < MINSIZE)	/* case 2. */ {
436 			size = MINSIZE;
437 			goto call_malloc;
438 		}
439 	} else if (size < MINSIZE)		/* case 3. */ {
440 		size = MINSIZE;
441 		goto chop_big;
442 	} else if (ISBIT1(ts)) {
443 		np = LAST(tp);
444 		unprotect(np);
445 		if ((SIZE(np) + SIZE(tp) + WORDSIZE) >= size) {
446 			ASSERT(!ISBIT0(SIZE(np)));
447 			t_delete(np);
448 			SIZE(np) += SIZE(tp) + WORDSIZE;
449 			/*
450 			 * Since the copy may overlap, use memmove().
451 			 */
452 			(void) memmove(DATA(np), old, SIZE(tp));
453 			old = DATA(np);
454 			tp = np;
455 			CLRBIT1(ts);
456 			goto chop_big;
457 		}
458 		protect(np);
459 	}
460 	SETOLD01(SIZE(tp), ts);
461 	protect(tp);
462 	(void) mutex_unlock(&__watch_malloc_lock);
463 	/* malloc() sets errno */
464 	return (NULL);
465 }
466 
467 /*
468  *	realfree().
469  *	Coalescing of adjacent free blocks is done first.
470  *	Then, the new free block is leaf-inserted into the free tree
471  *	without splaying. This strategy does not guarantee the amortized
472  *	O(nlogn) behaviour for the insert/delete/find set of operations
473  *	on the tree. In practice, however, free is much more infrequent
474  *	than malloc/realloc and the tree searches performed by these
475  *	functions adequately keep the tree in balance.
476  */
477 static void
478 realfree(void *old)
479 {
480 	TREE	*tp, *sp, *np, *tmp;
481 	size_t	ts, size;
482 
483 	COUNT(nfree);
484 
485 	/* pointer to the block */
486 	/* LINTED improper alignment */
487 	tp = BLOCK(old);
488 	unprotect(tp);
489 	ts = SIZE(tp);
490 	if (!ISBIT0(ts)) {	/* block is not busy; previously freed? */
491 		protect(tp);	/* force a watchpoint trap */
492 		CLRBIT0(SIZE(tp));
493 		return;
494 	}
495 	CLRBITS01(SIZE(tp));
496 	copy_pattern(FREEPAT, tp);
497 
498 	/* small block, return it to the tail of its queue */
499 	if (SIZE(tp) < MINSIZE) {
500 		ASSERT(SIZE(tp) / WORDSIZE >= 1);
501 		ts = SIZE(tp) / WORDSIZE - 1;
502 		AFTER(tp) = NULL;
503 		protect(tp);
504 		if (List[ts] == NULL) {
505 			List[ts] = tp;
506 			Last[ts] = tp;
507 		} else {
508 			sp = Last[ts];
509 			unprotect(sp);
510 			AFTER(sp) = tp;
511 			protect(sp);
512 			Last[ts] = tp;
513 		}
514 		return;
515 	}
516 
517 	/* see if coalescing with next block is warranted */
518 	/* LINTED improper alignment */
519 	np = NEXT(tp);
520 	unprotect(np);
521 	if (ISBIT0(SIZE(np)))
522 		protect(np);
523 	else {
524 		if (np != Bottom)
525 			t_delete(np);
526 		SIZE(tp) += SIZE(np) + WORDSIZE;
527 	}
528 
529 	/* the same with the preceding block */
530 	if (ISBIT1(ts)) {
531 		np = LAST(tp);
532 		unprotect(np);
533 		ASSERT(!ISBIT0(SIZE(np)));
534 		ASSERT(np != Bottom);
535 		t_delete(np);
536 		SIZE(np) += SIZE(tp) + WORDSIZE;
537 		tp = np;
538 	}
539 
540 	/* initialize tree info */
541 	PARENT(tp) = LEFT(tp) = RIGHT(tp) = LINKFOR(tp) = NULL;
542 
543 	/* set bottom block, or insert in the free tree */
544 	if (BOTTOM(tp))
545 		Bottom = tp;
546 	else {
547 		/* search for the place to insert */
548 		if (Root) {
549 			size = SIZE(tp);
550 			np = Root;
551 			for (;;) {
552 				unprotect(np);
553 				if (SIZE(np) > size) {
554 					if ((tmp = LEFT(np)) != NULL) {
555 						protect(np);
556 						np = tmp;
557 					} else {
558 						LEFT(np) = tp;
559 						PARENT(tp) = np;
560 						protect(np);
561 						break;
562 					}
563 				} else if (SIZE(np) < size) {
564 					if ((tmp = RIGHT(np)) != NULL) {
565 						protect(np);
566 						np = tmp;
567 					} else {
568 						RIGHT(np) = tp;
569 						PARENT(tp) = np;
570 						protect(np);
571 						break;
572 					}
573 				} else {
574 					if ((sp = PARENT(np)) != NULL) {
575 						unprotect(sp);
576 						if (np == LEFT(sp))
577 							LEFT(sp) = tp;
578 						else
579 							RIGHT(sp) = tp;
580 						PARENT(tp) = sp;
581 						protect(sp);
582 					} else
583 						Root = tp;
584 
585 					/* insert to head of list */
586 					if ((sp = LEFT(np)) != NULL) {
587 						unprotect(sp);
588 						PARENT(sp) = tp;
589 						protect(sp);
590 					}
591 					LEFT(tp) = sp;
592 
593 					if ((sp = RIGHT(np)) != NULL) {
594 						unprotect(sp);
595 						PARENT(sp) = tp;
596 						protect(sp);
597 					}
598 					RIGHT(tp) = sp;
599 
600 					/* doubly link list */
601 					LINKFOR(tp) = np;
602 					LINKBAK(np) = tp;
603 					SETNOTREE(np);
604 					protect(np);
605 
606 					break;
607 				}
608 			}
609 		} else {
610 			Root = tp;
611 		}
612 	}
613 
614 	/*
615 	 * Tell next block that this one is free.
616 	 * The first WORD of the next block contains self's address.
617 	 */
618 	/* LINTED improper alignment */
619 	tmp = NEXT(tp);
620 	unprotect(tmp);
621 	/* LINTED improper alignment */
622 	*(SELFP(tp)) = tp;
623 	SETBIT1(SIZE(tmp));
624 	ASSERT(ISBIT0(SIZE(tmp)));
625 	protect(tmp);
626 	protect(tp);
627 }
628 
629 /*
630  * Get more core. Gaps in memory are noted as busy blocks.
631  */
632 static TREE *
633 morecore(size_t size)
634 {
635 	TREE	*tp;
636 	size_t	n, offset, requestsize;
637 	char	*addr;
638 
639 	/* compute new amount of memory to get */
640 	tp = Bottom;
641 	n = size + 2 * WORDSIZE;
642 	addr = GETCORE(0);
643 
644 	if (addr == ERRCORE)
645 		/* errno set by GETCORE sbrk */
646 		return (NULL);
647 
648 	/* need to pad size out so that addr is aligned */
649 	if ((((size_t)addr) % ALIGN) != 0)
650 		offset = ALIGN - (size_t)addr % ALIGN;
651 	else
652 		offset = 0;
653 
654 	if (tp)
655 		unprotect(tp);
656 
657 	/* if not segmented memory, what we need may be smaller */
658 	if (addr == Baddr) {
659 		n -= WORDSIZE;
660 		if (tp != NULL)
661 			n -= SIZE(tp);
662 	}
663 
664 	/* get a multiple of CORESIZE */
665 	n = ((n - 1) / CORESIZE + 1) * CORESIZE;
666 	requestsize = n + offset;
667 
668 	/* check if nsize request could overflow in GETCORE */
669 	if (requestsize > MAX_MALLOC - (size_t)addr) {
670 		if (tp)
671 			protect(tp);
672 		errno = ENOMEM;
673 		return (NULL);
674 	}
675 
676 	if (requestsize > MAX_GETCORE) {
677 		intptr_t	delta;
678 		/*
679 		 * the value required is too big for GETCORE() to deal with
680 		 * in one go, so use GETCORE() at most 2 times instead.
681 		 * Argument to GETCORE() must be multiple of ALIGN.
682 		 * If not, GETCORE(-MAX_GETCORE) will not return brk point
683 		 * to previous value, but will be ALIGN more.
684 		 * This would leave a small hole.
685 		 */
686 		delta = MAX_GETCORE;
687 		while (delta > 0) {
688 			if (GETCORE(delta) == ERRCORE) {
689 				if (tp)
690 					protect(tp);
691 				if (addr != GETCORE(0))
692 					(void) GETCORE(-MAX_GETCORE);
693 				return (NULL);
694 			}
695 			requestsize -= MAX_GETCORE;
696 			delta = requestsize;
697 		}
698 	} else if (GETCORE(requestsize) == ERRCORE) {
699 		if (tp)
700 			protect(tp);
701 		return (NULL);
702 	}
703 
704 	/* contiguous memory */
705 	if (addr == Baddr) {
706 		ASSERT(offset == 0);
707 		if (tp) {
708 			addr = ((char *)tp);
709 			n += SIZE(tp) + 2 * WORDSIZE;
710 		} else {
711 			addr = Baddr - WORDSIZE;
712 			n += WORDSIZE;
713 		}
714 	} else {
715 		addr += offset;
716 	}
717 
718 	/* new bottom address */
719 	Baddr = addr + n;
720 
721 	/* new bottom block */
722 	/* LINTED improper alignment */
723 	tp = ((TREE *)addr);
724 	SIZE(tp) = n - 2 * WORDSIZE;
725 	ASSERT((SIZE(tp) % ALIGN) == 0);
726 
727 	/* reserved the last word to head any noncontiguous memory */
728 	/* LINTED improper alignment */
729 	SETBIT0(SIZE(NEXT(tp)));
730 
731 	/* non-contiguous memory, free old bottom block */
732 	if (Bottom && Bottom != tp) {
733 		SETBIT0(SIZE(Bottom));
734 		realfree(DATA(Bottom));
735 	}
736 
737 	return (tp);
738 }
739 
740 /*
741  * Utility function to avoid protecting a tree node twice.
742  * Return true if tp is in the NULL-terminated array of tree nodes.
743  */
744 static int
745 in_list(TREE *tp, TREE **npp)
746 {
747 	TREE *sp;
748 
749 	while ((sp = *npp++) != NULL)
750 		if (tp == sp)
751 			return (1);
752 	return (0);
753 }
754 
755 /*
756  * Tree rotation functions (BU: bottom-up, TD: top-down).
757  * All functions are entered with the arguments unprotected.
758  * They must return in the same condition, with all other elements
759  * that have been unprotected during the operation re-protected.
760  */
761 static void
762 LEFT1(TREE *x, TREE *y)
763 {
764 	TREE *node[3];
765 	TREE **npp = node;
766 	TREE *tp;
767 
768 	if ((RIGHT(x) = LEFT(y)) != NULL) {
769 		unprotect(*npp++ = RIGHT(x));
770 		PARENT(RIGHT(x)) = x;
771 	}
772 	if ((PARENT(y) = PARENT(x)) != NULL) {
773 		unprotect(*npp++ = PARENT(x));
774 		if (LEFT(PARENT(x)) == x)
775 			LEFT(PARENT(y)) = y;
776 		else
777 			RIGHT(PARENT(y)) = y;
778 	}
779 	LEFT(y) = x;
780 	PARENT(x) = y;
781 
782 	*npp = NULL;
783 	npp = node;
784 	while ((tp = *npp++) != NULL)
785 		if (tp != x && tp != y && !in_list(tp, npp))
786 			protect(tp);
787 }
788 
789 static void
790 RIGHT1(TREE *x, TREE *y)
791 {
792 	TREE *node[3];
793 	TREE **npp = node;
794 	TREE *tp;
795 
796 	if ((LEFT(x) = RIGHT(y)) != NULL) {
797 		unprotect(*npp++ = LEFT(x));
798 		PARENT(LEFT(x)) = x;
799 	}
800 	if ((PARENT(y) = PARENT(x)) != NULL) {
801 		unprotect(*npp++ = PARENT(x));
802 		if (LEFT(PARENT(x)) == x)
803 			LEFT(PARENT(y)) = y;
804 		else
805 			RIGHT(PARENT(y)) = y;
806 	}
807 	RIGHT(y) = x;
808 	PARENT(x) = y;
809 
810 	*npp = NULL;
811 	npp = node;
812 	while ((tp = *npp++) != NULL)
813 		if (tp != x && tp != y && !in_list(tp, npp))
814 			protect(tp);
815 }
816 
817 static void
818 BULEFT2(TREE *x, TREE *y, TREE *z)
819 {
820 	TREE *node[4];
821 	TREE **npp = node;
822 	TREE *tp;
823 
824 	if ((RIGHT(x) = LEFT(y)) != NULL) {
825 		unprotect(*npp++ = RIGHT(x));
826 		PARENT(RIGHT(x)) = x;
827 	}
828 	if ((RIGHT(y) = LEFT(z)) != NULL) {
829 		unprotect(*npp++ = RIGHT(y));
830 		PARENT(RIGHT(y)) = y;
831 	}
832 	if ((PARENT(z) = PARENT(x)) != NULL) {
833 		unprotect(*npp++ = PARENT(x));
834 		if (LEFT(PARENT(x)) == x)
835 			LEFT(PARENT(z)) = z;
836 		else
837 			RIGHT(PARENT(z)) = z;
838 	}
839 	LEFT(z) = y;
840 	PARENT(y) = z;
841 	LEFT(y) = x;
842 	PARENT(x) = y;
843 
844 	*npp = NULL;
845 	npp = node;
846 	while ((tp = *npp++) != NULL)
847 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
848 			protect(tp);
849 }
850 
851 static void
852 BURIGHT2(TREE *x, TREE *y, TREE *z)
853 {
854 	TREE *node[4];
855 	TREE **npp = node;
856 	TREE *tp;
857 
858 	if ((LEFT(x) = RIGHT(y)) != NULL) {
859 		unprotect(*npp++ = LEFT(x));
860 		PARENT(LEFT(x)) = x;
861 	}
862 	if ((LEFT(y) = RIGHT(z)) != NULL) {
863 		unprotect(*npp++ = LEFT(y));
864 		PARENT(LEFT(y)) = y;
865 	}
866 	if ((PARENT(z) = PARENT(x)) != NULL) {
867 		unprotect(*npp++ = PARENT(x));
868 		if (LEFT(PARENT(x)) == x)
869 			LEFT(PARENT(z)) = z;
870 		else
871 			RIGHT(PARENT(z)) = z;
872 	}
873 	RIGHT(z) = y;
874 	PARENT(y) = z;
875 	RIGHT(y) = x;
876 	PARENT(x) = y;
877 
878 	*npp = NULL;
879 	npp = node;
880 	while ((tp = *npp++) != NULL)
881 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
882 			protect(tp);
883 }
884 
885 static void
886 TDLEFT2(TREE *x, TREE *y, TREE *z)
887 {
888 	TREE *node[3];
889 	TREE **npp = node;
890 	TREE *tp;
891 
892 	if ((RIGHT(y) = LEFT(z)) != NULL) {
893 		unprotect(*npp++ = RIGHT(y));
894 		PARENT(RIGHT(y)) = y;
895 	}
896 	if ((PARENT(z) = PARENT(x)) != NULL) {
897 		unprotect(*npp++ = PARENT(x));
898 		if (LEFT(PARENT(x)) == x)
899 			LEFT(PARENT(z)) = z;
900 		else
901 			RIGHT(PARENT(z)) = z;
902 	}
903 	PARENT(x) = z;
904 	LEFT(z) = x;
905 
906 	*npp = NULL;
907 	npp = node;
908 	while ((tp = *npp++) != NULL)
909 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
910 			protect(tp);
911 }
912 
913 #if 0	/* Not used, for now */
914 static void
915 TDRIGHT2(TREE *x, TREE *y, TREE *z)
916 {
917 	TREE *node[3];
918 	TREE **npp = node;
919 	TREE *tp;
920 
921 	if ((LEFT(y) = RIGHT(z)) != NULL) {
922 		unprotect(*npp++ = LEFT(y));
923 		PARENT(LEFT(y)) = y;
924 	}
925 	if ((PARENT(z) = PARENT(x)) != NULL) {
926 		unprotect(*npp++ = PARENT(x));
927 		if (LEFT(PARENT(x)) == x)
928 			LEFT(PARENT(z)) = z;
929 		else
930 			RIGHT(PARENT(z)) = z;
931 	}
932 	PARENT(x) = z;
933 	RIGHT(z) = x;
934 
935 	*npp = NULL;
936 	npp = node;
937 	while ((tp = *npp++) != NULL)
938 		if (tp != x && tp != y && tp != z && !in_list(tp, npp))
939 			protect(tp);
940 }
941 #endif
942 
943 /*
944  *	Delete a tree element
945  */
946 static void
947 t_delete(TREE *op)
948 {
949 	TREE *tp, *sp, *gp;
950 
951 	/* if this is a non-tree node */
952 	if (ISNOTREE(op)) {
953 		tp = LINKBAK(op);
954 		unprotect(tp);
955 		if ((sp = LINKFOR(op)) != NULL) {
956 			unprotect(sp);
957 			LINKBAK(sp) = tp;
958 			protect(sp);
959 		}
960 		LINKFOR(tp) = sp;
961 		protect(tp);
962 		return;
963 	}
964 
965 	/* make op the root of the tree */
966 	if (PARENT(op))
967 		t_splay(op);
968 
969 	/* if this is the start of a list */
970 	if ((tp = LINKFOR(op)) != NULL) {
971 		unprotect(tp);
972 		PARENT(tp) = NULL;
973 		if ((sp = LEFT(op)) != NULL) {
974 			unprotect(sp);
975 			PARENT(sp) = tp;
976 			protect(sp);
977 		}
978 		LEFT(tp) = sp;
979 
980 		if ((sp = RIGHT(op)) != NULL) {
981 			unprotect(sp);
982 			PARENT(sp) = tp;
983 			protect(sp);
984 		}
985 		RIGHT(tp) = sp;
986 
987 		Root = tp;
988 		protect(tp);
989 		return;
990 	}
991 
992 	/* if op has a non-null left subtree */
993 	if ((tp = LEFT(op)) != NULL) {
994 		unprotect(tp);
995 		PARENT(tp) = NULL;
996 		if (RIGHT(op)) {
997 			/* make the right-end of the left subtree its root */
998 			while ((sp = RIGHT(tp)) != NULL) {
999 				unprotect(sp);
1000 				if ((gp = RIGHT(sp)) != NULL) {
1001 					unprotect(gp);
1002 					TDLEFT2(tp, sp, gp);
1003 					protect(sp);
1004 					protect(tp);
1005 					tp = gp;
1006 				} else {
1007 					LEFT1(tp, sp);
1008 					protect(tp);
1009 					tp = sp;
1010 				}
1011 			}
1012 
1013 			/* hook the right subtree of op to the above elt */
1014 			RIGHT(tp) = sp = RIGHT(op);
1015 			unprotect(sp);
1016 			PARENT(sp) = tp;
1017 			protect(sp);
1018 		}
1019 		protect(tp);
1020 	} else if ((tp = RIGHT(op)) != NULL) {	/* no left subtree */
1021 		unprotect(tp);
1022 		PARENT(tp) = NULL;
1023 		protect(tp);
1024 	}
1025 
1026 	Root = tp;
1027 }
1028 
1029 /*
1030  *	Bottom up splaying (simple version).
1031  *	The basic idea is to roughly cut in half the
1032  *	path from Root to tp and make tp the new root.
1033  */
1034 static void
1035 t_splay(TREE *tp)
1036 {
1037 	TREE *pp, *gp;
1038 
1039 	/* iterate until tp is the root */
1040 	while ((pp = PARENT(tp)) != NULL) {
1041 		unprotect(pp);
1042 		/* grandparent of tp */
1043 		gp = PARENT(pp);
1044 		if (gp)
1045 			unprotect(gp);
1046 
1047 		/* x is a left child */
1048 		if (LEFT(pp) == tp) {
1049 			if (gp && LEFT(gp) == pp) {
1050 				BURIGHT2(gp, pp, tp);
1051 				protect(gp);
1052 			} else {
1053 				if (gp)
1054 					protect(gp);
1055 				RIGHT1(pp, tp);
1056 			}
1057 		} else {
1058 			ASSERT(RIGHT(pp) == tp);
1059 			if (gp && RIGHT(gp) == pp) {
1060 				BULEFT2(gp, pp, tp);
1061 				protect(gp);
1062 			} else {
1063 				if (gp)
1064 					protect(gp);
1065 				LEFT1(pp, tp);
1066 			}
1067 		}
1068 		protect(pp);
1069 		unprotect(tp);	/* just in case */
1070 	}
1071 }
1072 
1073 void
1074 free(void *old)
1075 {
1076 	(void) mutex_lock(&__watch_malloc_lock);
1077 	free_unlocked(old);
1078 	(void) mutex_unlock(&__watch_malloc_lock);
1079 }
1080 
1081 
1082 static void
1083 free_unlocked(void *old)
1084 {
1085 	if (old != NULL)
1086 		realfree(old);
1087 }
1088 
1089 
1090 /*
1091  * memalign(align,nbytes)
1092  *
1093  * Description:
1094  *	Returns a block of specified size on a specified alignment boundary.
1095  *
1096  * Algorithm:
1097  *	Malloc enough to ensure that a block can be aligned correctly.
1098  *	Find the alignment point and return the fragments
1099  *	before and after the block.
1100  *
1101  * Errors:
1102  *	Returns NULL and sets errno as follows:
1103  *	[EINVAL]
1104  *		if nbytes = 0,
1105  *		or if alignment is misaligned,
1106  *		or if the heap has been detectably corrupted.
1107  *	[ENOMEM]
1108  *		if the requested memory could not be allocated.
1109  */
1110 
1111 #define	misaligned(p)		((unsigned)(p) & 3)
1112 		/* 4-byte "word" alignment is considered ok in LP64 */
1113 #define	nextblk(p, size)	((TREE *)((char *)(p) + (size)))
1114 
1115 void *
1116 memalign(size_t align, size_t nbytes)
1117 {
1118 	size_t	reqsize;	/* Num of bytes to get from malloc() */
1119 	TREE	*p;		/* Ptr returned from malloc() */
1120 	TREE	*blk;		/* For addressing fragment blocks */
1121 	size_t	blksize;	/* Current (shrinking) block size */
1122 	TREE	*alignedp;	/* Ptr to properly aligned boundary */
1123 	TREE	*aligned_blk;	/* The block to be returned */
1124 	size_t	frag_size;	/* size of fragments fore and aft */
1125 	size_t	x;
1126 
1127 	/*
1128 	 * check for valid size and alignment parameters
1129 	 * MAX_ALIGN check prevents overflow in later calculation.
1130 	 */
1131 	if (nbytes == 0 || misaligned(align) || align == 0 ||
1132 	    align > MAX_ALIGN) {
1133 		errno = EINVAL;
1134 		return (NULL);
1135 	}
1136 
1137 	/*
1138 	 * Malloc enough memory to guarantee that the result can be
1139 	 * aligned correctly. The worst case is when malloc returns
1140 	 * a block so close to the next alignment boundary that a
1141 	 * fragment of minimum size cannot be created.  In order to
1142 	 * make sure we can handle this, we need to force the
1143 	 * alignment to be at least as large as the minimum frag size
1144 	 * (MINSIZE + WORDSIZE).
1145 	 */
1146 
1147 	/* check for size that could overflow ROUND calculation */
1148 	if (nbytes > MAX_MALLOC) {
1149 		errno = ENOMEM;
1150 		return (NULL);
1151 	}
1152 	ROUND(nbytes);
1153 	if (nbytes < MINSIZE)
1154 		nbytes = MINSIZE;
1155 	ROUND(align);
1156 	while (align < MINSIZE + WORDSIZE)
1157 		align <<= 1;
1158 	reqsize = nbytes + align + (MINSIZE + WORDSIZE);
1159 	/* check for overflow */
1160 	if (reqsize < nbytes) {
1161 		errno = ENOMEM;
1162 		return (NULL);
1163 	}
1164 	p = (TREE *) malloc(reqsize);
1165 	if (p == (TREE *) NULL) {
1166 		/* malloc sets errno */
1167 		return (NULL);
1168 	}
1169 	(void) mutex_lock(&__watch_malloc_lock);
1170 
1171 	/*
1172 	 * get size of the entire block (overhead and all)
1173 	 */
1174 	/* LINTED improper alignment */
1175 	blk = BLOCK(p);			/* back up to get length word */
1176 	unprotect(blk);
1177 	blksize = SIZE(blk);
1178 	CLRBITS01(blksize);
1179 
1180 	/*
1181 	 * locate the proper alignment boundary within the block.
1182 	 */
1183 	x = (size_t)p;
1184 	if (x % align != 0)
1185 		x += align - (x % align);
1186 	alignedp = (TREE *)x;
1187 	/* LINTED improper alignment */
1188 	aligned_blk = BLOCK(alignedp);
1189 
1190 	/*
1191 	 * Check out the space to the left of the alignment
1192 	 * boundary, and split off a fragment if necessary.
1193 	 */
1194 	frag_size = (size_t)aligned_blk - (size_t)blk;
1195 	if (frag_size != 0) {
1196 		/*
1197 		 * Create a fragment to the left of the aligned block.
1198 		 */
1199 		if (frag_size < MINSIZE + WORDSIZE) {
1200 			/*
1201 			 * Not enough space. So make the split
1202 			 * at the other end of the alignment unit.
1203 			 * We know this yields enough space, because
1204 			 * we forced align >= MINSIZE + WORDSIZE above.
1205 			 */
1206 			frag_size += align;
1207 			/* LINTED improper alignment */
1208 			aligned_blk = nextblk(aligned_blk, align);
1209 		}
1210 		blksize -= frag_size;
1211 		SIZE(aligned_blk) = blksize | BIT0;
1212 		frag_size -= WORDSIZE;
1213 		SIZE(blk) = frag_size | BIT0 | ISBIT1(SIZE(blk));
1214 		free_unlocked(DATA(blk));
1215 		/*
1216 		 * free_unlocked(DATA(blk)) has the side-effect of calling
1217 		 * protect() on the block following blk, that is, aligned_blk.
1218 		 * We recover from this by unprotect()ing it here.
1219 		 */
1220 		unprotect(aligned_blk);
1221 	}
1222 
1223 	/*
1224 	 * Is there a (sufficiently large) fragment to the
1225 	 * right of the aligned block?
1226 	 */
1227 	frag_size = blksize - nbytes;
1228 	if (frag_size >= MINSIZE + WORDSIZE) {
1229 		/*
1230 		 * split and free a fragment on the right
1231 		 */
1232 		blksize = SIZE(aligned_blk);
1233 		SIZE(aligned_blk) = nbytes;
1234 		/* LINTED improper alignment */
1235 		blk = NEXT(aligned_blk);
1236 		SETOLD01(SIZE(aligned_blk), blksize);
1237 		frag_size -= WORDSIZE;
1238 		SIZE(blk) = frag_size | BIT0;
1239 		free_unlocked(DATA(blk));
1240 	}
1241 	copy_pattern(LIVEPAT, aligned_blk);
1242 	protect(aligned_blk);
1243 	(void) mutex_unlock(&__watch_malloc_lock);
1244 	return (DATA(aligned_blk));
1245 }
1246 
1247 void *
1248 valloc(size_t size)
1249 {
1250 	static unsigned pagesize;
1251 	if (!pagesize)
1252 		pagesize = _sysconf(_SC_PAGESIZE);
1253 	return (memalign(pagesize, size));
1254 }
1255 
1256 void *
1257 calloc(size_t num, size_t size)
1258 {
1259 	void *mp;
1260 	size_t total;
1261 
1262 	total = num * size;
1263 
1264 	/* check for overflow */
1265 	if (num != 0 && total / num != size) {
1266 		errno = ENOMEM;
1267 		return (NULL);
1268 	}
1269 	if ((mp = malloc(total)) != NULL)
1270 		(void) memset(mp, 0, total);
1271 	return (mp);
1272 }
1273 
1274 /* ARGSUSED1 */
1275 void
1276 cfree(void *p, size_t num, size_t size)
1277 {
1278 	free(p);
1279 }
1280 
1281 typedef struct {
1282 	long cmd;
1283 	prwatch_t prwatch;
1284 } ctl_t;
1285 
1286 static pid_t my_pid = 0;	/* to check for whether we fork()d */
1287 static int dont_watch = 0;
1288 static int do_stop = 0;
1289 static int ctlfd = -1;
1290 struct stat ctlstatb;
1291 static int wflags = WA_WRITE;
1292 
1293 static void
1294 init_watch()
1295 {
1296 	char str[80];
1297 	char *s;
1298 
1299 	my_pid = getpid();
1300 
1301 	dont_watch = 1;
1302 
1303 	if ((s = getenv("MALLOC_DEBUG")) == NULL)
1304 		return;
1305 
1306 	s = strncpy(str, s, sizeof (str));
1307 	while (s != NULL) {
1308 		char *e = strchr(s, ',');
1309 		if (e)
1310 			*e++ = '\0';
1311 		if (strcmp(s, "STOP") == 0)
1312 			do_stop = 1;
1313 		else if (strcmp(s, "WATCH") == 0)
1314 			dont_watch = 0;
1315 		else if (strcmp(s, "RW") == 0) {
1316 			dont_watch = 0;
1317 			wflags = WA_READ|WA_WRITE;
1318 		}
1319 		s = e;
1320 	}
1321 
1322 	if (dont_watch)
1323 		return;
1324 
1325 	if ((ctlfd = open("/proc/self/ctl", O_WRONLY)) < 0 ||
1326 	    fstat(ctlfd, &ctlstatb) != 0) {
1327 		if (ctlfd >= 0)
1328 			(void) close(ctlfd);
1329 		ctlfd = -1;
1330 		dont_watch = 1;
1331 		return;
1332 	}
1333 	/* close-on-exec */
1334 	(void) fcntl(ctlfd, F_SETFD, 1);
1335 
1336 	if (do_stop) {
1337 		int pfd;
1338 		pstatus_t pstatus;
1339 		struct {
1340 			long cmd;
1341 			fltset_t fltset;
1342 		} ctl;
1343 
1344 		/*
1345 		 * Play together with some /proc controller
1346 		 * that has set other stop-on-fault flags.
1347 		 */
1348 		premptyset(&ctl.fltset);
1349 		if ((pfd = open("/proc/self/status", O_RDONLY)) >= 0) {
1350 			if (read(pfd, &pstatus, sizeof (pstatus))
1351 			    == sizeof (pstatus))
1352 				ctl.fltset = pstatus.pr_flttrace;
1353 			(void) close(pfd);
1354 		}
1355 		praddset(&ctl.fltset, FLTWATCH);
1356 		ctl.cmd = PCSFAULT;
1357 		(void) write(ctlfd, &ctl, sizeof (ctl));
1358 	}
1359 }
1360 
1361 static int
1362 nowatch()
1363 {
1364 	struct stat statb;
1365 
1366 	if (dont_watch)
1367 		return (1);
1368 	if (ctlfd < 0)	/* first time */
1369 		init_watch();
1370 	else if (fstat(ctlfd, &statb) != 0 ||
1371 	    statb.st_dev != ctlstatb.st_dev ||
1372 	    statb.st_ino != ctlstatb.st_ino) {
1373 		/*
1374 		 * Someone closed our file descriptor.
1375 		 * Just open another one.
1376 		 */
1377 		if ((ctlfd = open("/proc/self/ctl", O_WRONLY)) < 0 ||
1378 		    fstat(ctlfd, &ctlstatb) != 0) {
1379 			if (ctlfd >= 0)
1380 				(void) close(ctlfd);
1381 			ctlfd = -1;
1382 			dont_watch = 1;
1383 			return (1);
1384 		}
1385 		/* close-on-exec */
1386 		(void) fcntl(ctlfd, F_SETFD, 1);
1387 	}
1388 	if (my_pid != getpid()) {
1389 		/*
1390 		 * We fork()d since the last call to the allocator.
1391 		 * watchpoints are not inherited across fork().
1392 		 * XXX: how to recover from this ???
1393 		 */
1394 		dont_watch = 1;
1395 		(void) close(ctlfd);
1396 		ctlfd = -1;
1397 	}
1398 	return (dont_watch);
1399 }
1400 
1401 static void
1402 protect(TREE *tp)
1403 {
1404 	ctl_t ctl;
1405 	size_t size, sz;
1406 
1407 	if (nowatch())
1408 		return;
1409 	if (tp == NULL || DATA(tp) == Baddr)
1410 		return;
1411 
1412 	sz = size = SIZE(tp);
1413 	CLRBITS01(size);
1414 	if (size == 0)
1415 		return;
1416 	if (ISBIT0(sz))		/* block is busy, protect only the head */
1417 		size = 0;
1418 	ctl.cmd = PCWATCH;
1419 	ctl.prwatch.pr_vaddr = (uintptr_t)tp;
1420 	ctl.prwatch.pr_size = size + WORDSIZE;
1421 	ctl.prwatch.pr_wflags = wflags;
1422 	(void) write(ctlfd, &ctl, sizeof (ctl));
1423 }
1424 
1425 static void
1426 unprotect(TREE *tp)
1427 {
1428 	ctl_t ctl;
1429 
1430 	if (nowatch())
1431 		return;
1432 	if (tp == NULL || DATA(tp) == Baddr)
1433 		return;
1434 
1435 	ctl.cmd = PCWATCH;
1436 	ctl.prwatch.pr_vaddr = (uintptr_t)tp;
1437 	ctl.prwatch.pr_size = WORDSIZE;		/* size is arbitrary */
1438 	ctl.prwatch.pr_wflags = 0;		/* clear the watched area */
1439 	(void) write(ctlfd, &ctl, sizeof (ctl));
1440 }
1441 
1442 static void
1443 malloc_prepare()
1444 {
1445 	(void) mutex_lock(&__watch_malloc_lock);
1446 }
1447 
1448 static void
1449 malloc_release()
1450 {
1451 	(void) mutex_unlock(&__watch_malloc_lock);
1452 }
1453 
1454 #pragma init(malloc_init)
1455 static void
1456 malloc_init(void)
1457 {
1458 	(void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
1459 }
1460