xref: /titanic_41/usr/src/uts/common/os/timers.c (revision 8fc99e42676a23421c75e76660640f9765d693b1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 1982, 1986 Regents of the University of California.
29  * All rights reserved.  The Berkeley software License Agreement
30  * specifies the terms and conditions for redistribution.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/user.h>
35 #include <sys/vnode.h>
36 #include <sys/proc.h>
37 #include <sys/time.h>
38 #include <sys/systm.h>
39 #include <sys/kmem.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpuvar.h>
42 #include <sys/timer.h>
43 #include <sys/debug.h>
44 #include <sys/sysmacros.h>
45 #include <sys/cyclic.h>
46 
47 static void	realitexpire(void *);
48 static void	realprofexpire(void *);
49 static void	timeval_advance(struct timeval *, struct timeval *);
50 
51 kmutex_t tod_lock;	/* protects time-of-day stuff */
52 
53 /*
54  * Constant to define the minimum interval value of the ITIMER_REALPROF timer.
55  * Value is in microseconds; defaults to 500 usecs.  Setting this value
56  * significantly lower may allow for denial-of-service attacks.
57  */
58 int itimer_realprof_minimum = 500;
59 
60 /*
61  * macro to compare a timeval to a timestruc
62  */
63 
64 #define	TVTSCMP(tvp, tsp, cmp) \
65 	/* CSTYLED */ \
66 	((tvp)->tv_sec cmp (tsp)->tv_sec || \
67 	((tvp)->tv_sec == (tsp)->tv_sec && \
68 	/* CSTYLED */ \
69 	(tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec))
70 
71 /*
72  * Time of day and interval timer support.
73  *
74  * These routines provide the kernel entry points to get and set
75  * the time-of-day and per-process interval timers.  Subroutines
76  * here provide support for adding and subtracting timeval structures
77  * and decrementing interval timers, optionally reloading the interval
78  * timers when they expire.
79  */
80 
81 /*
82  * SunOS function to generate monotonically increasing time values.
83  */
84 void
uniqtime(struct timeval * tv)85 uniqtime(struct timeval *tv)
86 {
87 	static struct timeval last;
88 	static int last_timechanged;
89 	timestruc_t ts;
90 	time_t sec;
91 	int usec, nsec;
92 
93 	/*
94 	 * protect modification of last
95 	 */
96 	mutex_enter(&tod_lock);
97 	gethrestime(&ts);
98 
99 	/*
100 	 * Fast algorithm to convert nsec to usec -- see hrt2ts()
101 	 * in common/os/timers.c for a full description.
102 	 */
103 	nsec = ts.tv_nsec;
104 	usec = nsec + (nsec >> 2);
105 	usec = nsec + (usec >> 1);
106 	usec = nsec + (usec >> 2);
107 	usec = nsec + (usec >> 4);
108 	usec = nsec - (usec >> 3);
109 	usec = nsec + (usec >> 2);
110 	usec = nsec + (usec >> 3);
111 	usec = nsec + (usec >> 4);
112 	usec = nsec + (usec >> 1);
113 	usec = nsec + (usec >> 6);
114 	usec = usec >> 10;
115 	sec = ts.tv_sec;
116 
117 	/*
118 	 * If the system hres time has been changed since the last time
119 	 * we are called. then all bets are off; just update our
120 	 * local copy of timechanged and accept the reported time as is.
121 	 */
122 	if (last_timechanged != timechanged) {
123 		last_timechanged = timechanged;
124 	}
125 	/*
126 	 * Try to keep timestamps unique, but don't be obsessive about
127 	 * it in the face of large differences.
128 	 */
129 	else if ((sec <= last.tv_sec) &&	/* same or lower seconds, and */
130 	    ((sec != last.tv_sec) ||		/* either different second or */
131 	    (usec <= last.tv_usec)) &&		/* lower microsecond, and */
132 	    ((last.tv_sec - sec) <= 5)) {	/* not way back in time */
133 		sec = last.tv_sec;
134 		usec = last.tv_usec + 1;
135 		if (usec >= MICROSEC) {
136 			usec -= MICROSEC;
137 			sec++;
138 		}
139 	}
140 	last.tv_sec = sec;
141 	last.tv_usec = usec;
142 	mutex_exit(&tod_lock);
143 
144 	tv->tv_sec = sec;
145 	tv->tv_usec = usec;
146 }
147 
148 /*
149  * Timestamps are exported from the kernel in several places.
150  * Such timestamps are commonly used for either uniqueness or for
151  * sequencing - truncation to 32-bits is fine for uniqueness,
152  * but sequencing is going to take more work as we get closer to 2038!
153  */
154 void
uniqtime32(struct timeval32 * tv32p)155 uniqtime32(struct timeval32 *tv32p)
156 {
157 	struct timeval tv;
158 
159 	uniqtime(&tv);
160 	TIMEVAL_TO_TIMEVAL32(tv32p, &tv);
161 }
162 
163 int
gettimeofday(struct timeval * tp)164 gettimeofday(struct timeval *tp)
165 {
166 	struct timeval atv;
167 
168 	if (tp) {
169 		uniqtime(&atv);
170 		if (get_udatamodel() == DATAMODEL_NATIVE) {
171 			if (copyout(&atv, tp, sizeof (atv)))
172 				return (set_errno(EFAULT));
173 		} else {
174 			struct timeval32 tv32;
175 
176 			if (TIMEVAL_OVERFLOW(&atv))
177 				return (set_errno(EOVERFLOW));
178 			TIMEVAL_TO_TIMEVAL32(&tv32, &atv);
179 
180 			if (copyout(&tv32, tp, sizeof (tv32)))
181 				return (set_errno(EFAULT));
182 		}
183 	}
184 	return (0);
185 }
186 
187 int
getitimer(uint_t which,struct itimerval * itv)188 getitimer(uint_t which, struct itimerval *itv)
189 {
190 	int error;
191 
192 	if (get_udatamodel() == DATAMODEL_NATIVE)
193 		error = xgetitimer(which, itv, 0);
194 	else {
195 		struct itimerval kitv;
196 
197 		if ((error = xgetitimer(which, &kitv, 1)) == 0) {
198 			if (ITIMERVAL_OVERFLOW(&kitv)) {
199 				error = EOVERFLOW;
200 			} else {
201 				struct itimerval32 itv32;
202 
203 				ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv);
204 				if (copyout(&itv32, itv, sizeof (itv32)) != 0)
205 					error = EFAULT;
206 			}
207 		}
208 	}
209 
210 	return (error ? (set_errno(error)) : 0);
211 }
212 
213 int
xgetitimer(uint_t which,struct itimerval * itv,int iskaddr)214 xgetitimer(uint_t which, struct itimerval *itv, int iskaddr)
215 {
216 	struct proc *p = curproc;
217 	struct timeval now;
218 	struct itimerval aitv;
219 	hrtime_t ts, first, interval, remain;
220 
221 	mutex_enter(&p->p_lock);
222 
223 	switch (which) {
224 	case ITIMER_VIRTUAL:
225 	case ITIMER_PROF:
226 		aitv = ttolwp(curthread)->lwp_timer[which];
227 		break;
228 
229 	case ITIMER_REAL:
230 		uniqtime(&now);
231 		aitv = p->p_realitimer;
232 
233 		if (timerisset(&aitv.it_value)) {
234 			/*CSTYLED*/
235 			if (timercmp(&aitv.it_value, &now, <)) {
236 				timerclear(&aitv.it_value);
237 			} else {
238 				timevalsub(&aitv.it_value, &now);
239 			}
240 		}
241 		break;
242 
243 	case ITIMER_REALPROF:
244 		if (curproc->p_rprof_cyclic == CYCLIC_NONE) {
245 			bzero(&aitv, sizeof (aitv));
246 			break;
247 		}
248 
249 		aitv = curproc->p_rprof_timer;
250 
251 		first = tv2hrt(&aitv.it_value);
252 		interval = tv2hrt(&aitv.it_interval);
253 
254 		if ((ts = gethrtime()) < first) {
255 			/*
256 			 * We haven't gone off for the first time; the time
257 			 * remaining is simply the first time we will go
258 			 * off minus the current time.
259 			 */
260 			remain = first - ts;
261 		} else {
262 			if (interval == 0) {
263 				/*
264 				 * This was set as a one-shot, and we've
265 				 * already gone off; there is no time
266 				 * remaining.
267 				 */
268 				remain = 0;
269 			} else {
270 				/*
271 				 * We have a non-zero interval; we need to
272 				 * determine how far we are into the current
273 				 * interval, and subtract that from the
274 				 * interval to determine the time remaining.
275 				 */
276 				remain = interval - ((ts - first) % interval);
277 			}
278 		}
279 
280 		hrt2tv(remain, &aitv.it_value);
281 		break;
282 
283 	default:
284 		mutex_exit(&p->p_lock);
285 		return (EINVAL);
286 	}
287 
288 	mutex_exit(&p->p_lock);
289 
290 	if (iskaddr) {
291 		bcopy(&aitv, itv, sizeof (*itv));
292 	} else {
293 		ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
294 		if (copyout(&aitv, itv, sizeof (*itv)))
295 			return (EFAULT);
296 	}
297 
298 	return (0);
299 }
300 
301 
302 int
setitimer(uint_t which,struct itimerval * itv,struct itimerval * oitv)303 setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv)
304 {
305 	int error;
306 
307 	if (oitv != NULL)
308 		if ((error = getitimer(which, oitv)) != 0)
309 			return (error);
310 
311 	if (itv == NULL)
312 		return (0);
313 
314 	if (get_udatamodel() == DATAMODEL_NATIVE)
315 		error = xsetitimer(which, itv, 0);
316 	else {
317 		struct itimerval32 itv32;
318 		struct itimerval kitv;
319 
320 		if (copyin(itv, &itv32, sizeof (itv32)))
321 			error = EFAULT;
322 		ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32);
323 		error = xsetitimer(which, &kitv, 1);
324 	}
325 
326 	return (error ? (set_errno(error)) : 0);
327 }
328 
329 int
xsetitimer(uint_t which,struct itimerval * itv,int iskaddr)330 xsetitimer(uint_t which, struct itimerval *itv, int iskaddr)
331 {
332 	struct itimerval aitv;
333 	struct timeval now;
334 	struct proc *p = curproc;
335 	kthread_t *t;
336 	timeout_id_t tmp_id;
337 	cyc_handler_t hdlr;
338 	cyc_time_t when;
339 	cyclic_id_t cyclic;
340 	hrtime_t ts;
341 	int min;
342 
343 	if (itv == NULL)
344 		return (0);
345 
346 	if (iskaddr) {
347 		bcopy(itv, &aitv, sizeof (aitv));
348 	} else {
349 		ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
350 		if (copyin(itv, &aitv, sizeof (aitv)))
351 			return (EFAULT);
352 	}
353 
354 	if (which == ITIMER_REALPROF) {
355 		min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)),
356 		    itimer_realprof_minimum);
357 	} else {
358 		min = usec_per_tick;
359 	}
360 
361 	if (itimerfix(&aitv.it_value, min) ||
362 	    (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value)))
363 		return (EINVAL);
364 
365 	mutex_enter(&p->p_lock);
366 	switch (which) {
367 	case ITIMER_REAL:
368 		/*
369 		 * The SITBUSY flag prevents conflicts with multiple
370 		 * threads attempting to perform setitimer(ITIMER_REAL)
371 		 * at the same time, even when we drop p->p_lock below.
372 		 * Any blocked thread returns successfully because the
373 		 * effect is the same as if it got here first, finished,
374 		 * and the other thread then came through and destroyed
375 		 * what it did.  We are just protecting the system from
376 		 * malfunctioning due to the race condition.
377 		 */
378 		if (p->p_flag & SITBUSY) {
379 			mutex_exit(&p->p_lock);
380 			return (0);
381 		}
382 		p->p_flag |= SITBUSY;
383 		while ((tmp_id = p->p_itimerid) != 0) {
384 			/*
385 			 * Avoid deadlock in callout_delete (called from
386 			 * untimeout) which may go to sleep (while holding
387 			 * p_lock). Drop p_lock and re-acquire it after
388 			 * untimeout returns. Need to clear p_itimerid
389 			 * while holding p_lock.
390 			 */
391 			p->p_itimerid = 0;
392 			mutex_exit(&p->p_lock);
393 			(void) untimeout(tmp_id);
394 			mutex_enter(&p->p_lock);
395 		}
396 		if (timerisset(&aitv.it_value)) {
397 			uniqtime(&now);
398 			timevaladd(&aitv.it_value, &now);
399 			p->p_itimerid = realtime_timeout(realitexpire,
400 			    p, hzto(&aitv.it_value));
401 		}
402 		p->p_realitimer = aitv;
403 		p->p_flag &= ~SITBUSY;
404 		break;
405 
406 	case ITIMER_REALPROF:
407 		cyclic = p->p_rprof_cyclic;
408 		p->p_rprof_cyclic = CYCLIC_NONE;
409 
410 		mutex_exit(&p->p_lock);
411 
412 		/*
413 		 * We're now going to acquire cpu_lock, remove the old cyclic
414 		 * if necessary, and add our new cyclic.
415 		 */
416 		mutex_enter(&cpu_lock);
417 
418 		if (cyclic != CYCLIC_NONE)
419 			cyclic_remove(cyclic);
420 
421 		if (!timerisset(&aitv.it_value)) {
422 			/*
423 			 * If we were passed a value of 0, we're done.
424 			 */
425 			mutex_exit(&cpu_lock);
426 			return (0);
427 		}
428 
429 		hdlr.cyh_func = realprofexpire;
430 		hdlr.cyh_arg = p;
431 		hdlr.cyh_level = CY_LOW_LEVEL;
432 
433 		when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value));
434 		when.cyt_interval = tv2hrt(&aitv.it_interval);
435 
436 		if (when.cyt_interval == 0) {
437 			/*
438 			 * Using the same logic as for CLOCK_HIGHRES timers, we
439 			 * set the interval to be INT64_MAX - when.cyt_when to
440 			 * effect a one-shot; see the comment in clock_highres.c
441 			 * for more details on why this works.
442 			 */
443 			when.cyt_interval = INT64_MAX - when.cyt_when;
444 		}
445 
446 		cyclic = cyclic_add(&hdlr, &when);
447 
448 		mutex_exit(&cpu_lock);
449 
450 		/*
451 		 * We have now successfully added the cyclic.  Reacquire
452 		 * p_lock, and see if anyone has snuck in.
453 		 */
454 		mutex_enter(&p->p_lock);
455 
456 		if (p->p_rprof_cyclic != CYCLIC_NONE) {
457 			/*
458 			 * We're racing with another thread establishing an
459 			 * ITIMER_REALPROF interval timer.  We'll let the other
460 			 * thread win (this is a race at the application level,
461 			 * so letting the other thread win is acceptable).
462 			 */
463 			mutex_exit(&p->p_lock);
464 			mutex_enter(&cpu_lock);
465 			cyclic_remove(cyclic);
466 			mutex_exit(&cpu_lock);
467 
468 			return (0);
469 		}
470 
471 		/*
472 		 * Success.  Set our tracking variables in the proc structure,
473 		 * cancel any outstanding ITIMER_PROF, and allocate the
474 		 * per-thread SIGPROF buffers, if possible.
475 		 */
476 		hrt2tv(ts, &aitv.it_value);
477 		p->p_rprof_timer = aitv;
478 		p->p_rprof_cyclic = cyclic;
479 
480 		t = p->p_tlist;
481 		do {
482 			struct itimerval *itvp;
483 
484 			itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF];
485 			timerclear(&itvp->it_interval);
486 			timerclear(&itvp->it_value);
487 
488 			if (t->t_rprof != NULL)
489 				continue;
490 
491 			t->t_rprof =
492 			    kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP);
493 			aston(t);
494 		} while ((t = t->t_forw) != p->p_tlist);
495 
496 		break;
497 
498 	case ITIMER_VIRTUAL:
499 		ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv;
500 		break;
501 
502 	case ITIMER_PROF:
503 		if (p->p_rprof_cyclic != CYCLIC_NONE) {
504 			/*
505 			 * Silently ignore ITIMER_PROF if ITIMER_REALPROF
506 			 * is in effect.
507 			 */
508 			break;
509 		}
510 
511 		ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv;
512 		break;
513 
514 	default:
515 		mutex_exit(&p->p_lock);
516 		return (EINVAL);
517 	}
518 	mutex_exit(&p->p_lock);
519 	return (0);
520 }
521 
522 /*
523  * Delete the ITIMER_REALPROF interval timer.
524  * Called only from exec_args() when exec occurs.
525  * The other ITIMER_* interval timers are specified
526  * to be inherited across exec(), so leave them alone.
527  */
528 void
delete_itimer_realprof(void)529 delete_itimer_realprof(void)
530 {
531 	kthread_t *t = curthread;
532 	struct proc *p = ttoproc(t);
533 	klwp_t *lwp = ttolwp(t);
534 	cyclic_id_t cyclic;
535 
536 	mutex_enter(&p->p_lock);
537 
538 	/* we are performing execve(); assert we are single-threaded */
539 	ASSERT(t == p->p_tlist && t == t->t_forw);
540 
541 	if ((cyclic = p->p_rprof_cyclic) == CYCLIC_NONE) {
542 		mutex_exit(&p->p_lock);
543 	} else {
544 		p->p_rprof_cyclic = CYCLIC_NONE;
545 		/*
546 		 * Delete any current instance of SIGPROF.
547 		 */
548 		if (lwp->lwp_cursig == SIGPROF) {
549 			lwp->lwp_cursig = 0;
550 			lwp->lwp_extsig = 0;
551 			if (lwp->lwp_curinfo) {
552 				siginfofree(lwp->lwp_curinfo);
553 				lwp->lwp_curinfo = NULL;
554 			}
555 		}
556 		/*
557 		 * Delete any pending instances of SIGPROF.
558 		 */
559 		sigdelset(&p->p_sig, SIGPROF);
560 		sigdelset(&p->p_extsig, SIGPROF);
561 		sigdelq(p, NULL, SIGPROF);
562 		sigdelset(&t->t_sig, SIGPROF);
563 		sigdelset(&t->t_extsig, SIGPROF);
564 		sigdelq(p, t, SIGPROF);
565 
566 		mutex_exit(&p->p_lock);
567 
568 		/*
569 		 * Remove the ITIMER_REALPROF cyclic.
570 		 */
571 		mutex_enter(&cpu_lock);
572 		cyclic_remove(cyclic);
573 		mutex_exit(&cpu_lock);
574 	}
575 }
576 
577 /*
578  * Real interval timer expired:
579  * send process whose timer expired an alarm signal.
580  * If time is not set up to reload, then just return.
581  * Else compute next time timer should go off which is > current time.
582  * This is where delay in processing this timeout causes multiple
583  * SIGALRM calls to be compressed into one.
584  */
585 static void
realitexpire(void * arg)586 realitexpire(void *arg)
587 {
588 	struct proc *p = arg;
589 	struct timeval *valp = &p->p_realitimer.it_value;
590 	struct timeval *intervalp = &p->p_realitimer.it_interval;
591 #if !defined(_LP64)
592 	clock_t	ticks;
593 #endif
594 
595 	mutex_enter(&p->p_lock);
596 #if !defined(_LP64)
597 	if ((ticks = hzto(valp)) > 1) {
598 		/*
599 		 * If we are executing before we were meant to, it must be
600 		 * because of an overflow in a prior hzto() calculation.
601 		 * In this case, we want to go to sleep for the recalculated
602 		 * number of ticks. For the special meaning of the value "1"
603 		 * see comment in timespectohz().
604 		 */
605 		p->p_itimerid = realtime_timeout(realitexpire, p, ticks);
606 		mutex_exit(&p->p_lock);
607 		return;
608 	}
609 #endif
610 	sigtoproc(p, NULL, SIGALRM);
611 	if (!timerisset(intervalp)) {
612 		timerclear(valp);
613 		p->p_itimerid = 0;
614 	} else {
615 		/* advance timer value past current time */
616 		timeval_advance(valp, intervalp);
617 		p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp));
618 	}
619 	mutex_exit(&p->p_lock);
620 }
621 
622 /*
623  * Real time profiling interval timer expired:
624  * Increment microstate counters for each lwp in the process
625  * and ensure that running lwps are kicked into the kernel.
626  * If time is not set up to reload, then just return.
627  * Else compute next time timer should go off which is > current time,
628  * as above.
629  */
630 static void
realprofexpire(void * arg)631 realprofexpire(void *arg)
632 {
633 	struct proc *p = arg;
634 	kthread_t *t;
635 
636 	mutex_enter(&p->p_lock);
637 	if (p->p_rprof_cyclic == CYCLIC_NONE ||
638 	    (t = p->p_tlist) == NULL) {
639 		mutex_exit(&p->p_lock);
640 		return;
641 	}
642 	do {
643 		int mstate;
644 
645 		/*
646 		 * Attempt to allocate the SIGPROF buffer, but don't sleep.
647 		 */
648 		if (t->t_rprof == NULL)
649 			t->t_rprof = kmem_zalloc(sizeof (struct rprof),
650 			    KM_NOSLEEP);
651 		if (t->t_rprof == NULL)
652 			continue;
653 
654 		thread_lock(t);
655 		switch (t->t_state) {
656 		case TS_SLEEP:
657 			/*
658 			 * Don't touch the lwp is it is swapped out.
659 			 */
660 			if (!(t->t_schedflag & TS_LOAD)) {
661 				mstate = LMS_SLEEP;
662 				break;
663 			}
664 			switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) {
665 			case LMS_TFAULT:
666 			case LMS_DFAULT:
667 			case LMS_KFAULT:
668 			case LMS_USER_LOCK:
669 				break;
670 			default:
671 				mstate = LMS_SLEEP;
672 				break;
673 			}
674 			break;
675 		case TS_RUN:
676 		case TS_WAIT:
677 			mstate = LMS_WAIT_CPU;
678 			break;
679 		case TS_ONPROC:
680 			switch (mstate = t->t_mstate) {
681 			case LMS_USER:
682 			case LMS_SYSTEM:
683 			case LMS_TRAP:
684 				break;
685 			default:
686 				mstate = LMS_SYSTEM;
687 				break;
688 			}
689 			break;
690 		default:
691 			mstate = t->t_mstate;
692 			break;
693 		}
694 		t->t_rprof->rp_anystate = 1;
695 		t->t_rprof->rp_state[mstate]++;
696 		aston(t);
697 		/*
698 		 * force the thread into the kernel
699 		 * if it is not already there.
700 		 */
701 		if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
702 			poke_cpu(t->t_cpu->cpu_id);
703 		thread_unlock(t);
704 	} while ((t = t->t_forw) != p->p_tlist);
705 
706 	mutex_exit(&p->p_lock);
707 }
708 
709 /*
710  * Advances timer value past the current time of day.  See the detailed
711  * comment for this logic in realitsexpire(), above.
712  */
713 static void
timeval_advance(struct timeval * valp,struct timeval * intervalp)714 timeval_advance(struct timeval *valp, struct timeval *intervalp)
715 {
716 	int cnt2nth;
717 	struct timeval interval2nth;
718 
719 	for (;;) {
720 		interval2nth = *intervalp;
721 		for (cnt2nth = 0; ; cnt2nth++) {
722 			timevaladd(valp, &interval2nth);
723 			/*CSTYLED*/
724 			if (TVTSCMP(valp, &hrestime, >))
725 				break;
726 			timevaladd(&interval2nth, &interval2nth);
727 		}
728 		if (cnt2nth == 0)
729 			break;
730 		timevalsub(valp, &interval2nth);
731 	}
732 }
733 
734 /*
735  * Check that a proposed value to load into the .it_value or .it_interval
736  * part of an interval timer is acceptable, and set it to at least a
737  * specified minimal value.
738  */
739 int
itimerfix(struct timeval * tv,int minimum)740 itimerfix(struct timeval *tv, int minimum)
741 {
742 	if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
743 	    tv->tv_usec < 0 || tv->tv_usec >= MICROSEC)
744 		return (EINVAL);
745 	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum)
746 		tv->tv_usec = minimum;
747 	return (0);
748 }
749 
750 /*
751  * Same as itimerfix, except a) it takes a timespec instead of a timeval and
752  * b) it doesn't truncate based on timeout granularity; consumers of this
753  * interface (e.g. timer_settime()) depend on the passed timespec not being
754  * modified implicitly.
755  */
756 int
itimerspecfix(timespec_t * tv)757 itimerspecfix(timespec_t *tv)
758 {
759 	if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC)
760 		return (EINVAL);
761 	return (0);
762 }
763 
764 /*
765  * Decrement an interval timer by a specified number
766  * of microseconds, which must be less than a second,
767  * i.e. < 1000000.  If the timer expires, then reload
768  * it.  In this case, carry over (usec - old value) to
769  * reducint the value reloaded into the timer so that
770  * the timer does not drift.  This routine assumes
771  * that it is called in a context where the timers
772  * on which it is operating cannot change in value.
773  */
774 int
itimerdecr(struct itimerval * itp,int usec)775 itimerdecr(struct itimerval *itp, int usec)
776 {
777 	if (itp->it_value.tv_usec < usec) {
778 		if (itp->it_value.tv_sec == 0) {
779 			/* expired, and already in next interval */
780 			usec -= itp->it_value.tv_usec;
781 			goto expire;
782 		}
783 		itp->it_value.tv_usec += MICROSEC;
784 		itp->it_value.tv_sec--;
785 	}
786 	itp->it_value.tv_usec -= usec;
787 	usec = 0;
788 	if (timerisset(&itp->it_value))
789 		return (1);
790 	/* expired, exactly at end of interval */
791 expire:
792 	if (timerisset(&itp->it_interval)) {
793 		itp->it_value = itp->it_interval;
794 		itp->it_value.tv_usec -= usec;
795 		if (itp->it_value.tv_usec < 0) {
796 			itp->it_value.tv_usec += MICROSEC;
797 			itp->it_value.tv_sec--;
798 		}
799 	} else
800 		itp->it_value.tv_usec = 0;		/* sec is already 0 */
801 	return (0);
802 }
803 
804 /*
805  * Add and subtract routines for timevals.
806  * N.B.: subtract routine doesn't deal with
807  * results which are before the beginning,
808  * it just gets very confused in this case.
809  * Caveat emptor.
810  */
811 void
timevaladd(struct timeval * t1,struct timeval * t2)812 timevaladd(struct timeval *t1, struct timeval *t2)
813 {
814 	t1->tv_sec += t2->tv_sec;
815 	t1->tv_usec += t2->tv_usec;
816 	timevalfix(t1);
817 }
818 
819 void
timevalsub(struct timeval * t1,struct timeval * t2)820 timevalsub(struct timeval *t1, struct timeval *t2)
821 {
822 	t1->tv_sec -= t2->tv_sec;
823 	t1->tv_usec -= t2->tv_usec;
824 	timevalfix(t1);
825 }
826 
827 void
timevalfix(struct timeval * t1)828 timevalfix(struct timeval *t1)
829 {
830 	if (t1->tv_usec < 0) {
831 		t1->tv_sec--;
832 		t1->tv_usec += MICROSEC;
833 	}
834 	if (t1->tv_usec >= MICROSEC) {
835 		t1->tv_sec++;
836 		t1->tv_usec -= MICROSEC;
837 	}
838 }
839 
840 /*
841  * Same as the routines above. These routines take a timespec instead
842  * of a timeval.
843  */
844 void
timespecadd(timespec_t * t1,timespec_t * t2)845 timespecadd(timespec_t *t1, timespec_t *t2)
846 {
847 	t1->tv_sec += t2->tv_sec;
848 	t1->tv_nsec += t2->tv_nsec;
849 	timespecfix(t1);
850 }
851 
852 void
timespecsub(timespec_t * t1,timespec_t * t2)853 timespecsub(timespec_t *t1, timespec_t *t2)
854 {
855 	t1->tv_sec -= t2->tv_sec;
856 	t1->tv_nsec -= t2->tv_nsec;
857 	timespecfix(t1);
858 }
859 
860 void
timespecfix(timespec_t * t1)861 timespecfix(timespec_t *t1)
862 {
863 	if (t1->tv_nsec < 0) {
864 		t1->tv_sec--;
865 		t1->tv_nsec += NANOSEC;
866 	} else {
867 		if (t1->tv_nsec >= NANOSEC) {
868 			t1->tv_sec++;
869 			t1->tv_nsec -= NANOSEC;
870 		}
871 	}
872 }
873 
874 /*
875  * Compute number of hz until specified time.
876  * Used to compute third argument to timeout() from an absolute time.
877  */
878 clock_t
hzto(struct timeval * tv)879 hzto(struct timeval *tv)
880 {
881 	timespec_t ts, now;
882 
883 	ts.tv_sec = tv->tv_sec;
884 	ts.tv_nsec = tv->tv_usec * 1000;
885 	gethrestime_lasttick(&now);
886 
887 	return (timespectohz(&ts, now));
888 }
889 
890 /*
891  * Compute number of hz until specified time for a given timespec value.
892  * Used to compute third argument to timeout() from an absolute time.
893  */
894 clock_t
timespectohz(timespec_t * tv,timespec_t now)895 timespectohz(timespec_t *tv, timespec_t now)
896 {
897 	clock_t	ticks;
898 	time_t	sec;
899 	int	nsec;
900 
901 	/*
902 	 * Compute number of ticks we will see between now and
903 	 * the target time; returns "1" if the destination time
904 	 * is before the next tick, so we always get some delay,
905 	 * and returns LONG_MAX ticks if we would overflow.
906 	 */
907 	sec = tv->tv_sec - now.tv_sec;
908 	nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1;
909 
910 	if (nsec < 0) {
911 		sec--;
912 		nsec += NANOSEC;
913 	} else if (nsec >= NANOSEC) {
914 		sec++;
915 		nsec -= NANOSEC;
916 	}
917 
918 	ticks = NSEC_TO_TICK(nsec);
919 
920 	/*
921 	 * Compute ticks, accounting for negative and overflow as above.
922 	 * Overflow protection kicks in at about 70 weeks for hz=50
923 	 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
924 	 * kernel :-)
925 	 */
926 	if (sec < 0 || (sec == 0 && ticks < 1))
927 		ticks = 1;			/* protect vs nonpositive */
928 	else if (sec > (LONG_MAX - ticks) / hz)
929 		ticks = LONG_MAX;		/* protect vs overflow */
930 	else
931 		ticks += sec * hz;		/* common case */
932 
933 	return (ticks);
934 }
935 
936 /*
937  * Compute number of hz with the timespec tv specified.
938  * The return type must be 64 bit integer.
939  */
940 int64_t
timespectohz64(timespec_t * tv)941 timespectohz64(timespec_t *tv)
942 {
943 	int64_t ticks;
944 	int64_t sec;
945 	int64_t nsec;
946 
947 	sec = tv->tv_sec;
948 	nsec = tv->tv_nsec + nsec_per_tick - 1;
949 
950 	if (nsec < 0) {
951 		sec--;
952 		nsec += NANOSEC;
953 	} else if (nsec >= NANOSEC) {
954 		sec++;
955 		nsec -= NANOSEC;
956 	}
957 
958 	ticks = NSEC_TO_TICK(nsec);
959 
960 	/*
961 	 * Compute ticks, accounting for negative and overflow as above.
962 	 * Overflow protection kicks in at about 70 weeks for hz=50
963 	 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
964 	 * kernel
965 	 */
966 	if (sec < 0 || (sec == 0 && ticks < 1))
967 		ticks = 1;			/* protect vs nonpositive */
968 	else if (sec > (((~0ULL) >> 1) - ticks) / hz)
969 		ticks = (~0ULL) >> 1;		/* protect vs overflow */
970 	else
971 		ticks += sec * hz;		/* common case */
972 
973 	return (ticks);
974 }
975 
976 /*
977  * hrt2ts(): convert from hrtime_t to timestruc_t.
978  *
979  * All this routine really does is:
980  *
981  *	tsp->sec  = hrt / NANOSEC;
982  *	tsp->nsec = hrt % NANOSEC;
983  *
984  * The black magic below avoids doing a 64-bit by 32-bit integer divide,
985  * which is quite expensive.  There's actually much more going on here than
986  * it might first appear -- don't try this at home.
987  *
988  * For the adventuresome, here's an explanation of how it works.
989  *
990  * Multiplication by a fixed constant is easy -- you just do the appropriate
991  * shifts and adds.  For example, to multiply by 10, we observe that
992  *
993  *	x * 10	= x * (8 + 2)
994  *		= (x * 8) + (x * 2)
995  *		= (x << 3) + (x << 1).
996  *
997  * In general, you can read the algorithm right off the bits: the number 10
998  * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3).
999  *
1000  * Sometimes you can do better.  For example, 15 is 1111 binary, so the normal
1001  * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3).
1002  * But, it's cheaper if you capitalize on the fact that you have a run of ones:
1003  * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0).  [You would never
1004  * actually perform the operation << 0, since it's a no-op; I'm just writing
1005  * it that way for clarity.]
1006  *
1007  * The other way you can win is if you get lucky with the prime factorization
1008  * of your constant.  The number 1,000,000,000, which we have to multiply
1009  * by below, is a good example.  One billion is 111011100110101100101000000000
1010  * in binary.  If you apply the bit-grouping trick, it doesn't buy you very
1011  * much, because it's only a win for groups of three or more equal bits:
1012  *
1013  * 111011100110101100101000000000 = 1000000000000000000000000000000
1014  *				  -  000100011001010011011000000000
1015  *
1016  * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS,
1017  * we have reduced this to 10 shift/add pairs (20 operations) on the RHS.
1018  * This is better, but not great.
1019  *
1020  * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125,
1021  * and multiply by each factor.  Multiplication by 125 is particularly easy,
1022  * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four
1023  * operations.  So, to multiply by 1,000,000,000, we perform three multipli-
1024  * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations.
1025  * This is the algorithm we actually use in both hrt2ts() and ts2hrt().
1026  *
1027  * Division is harder; there is no equivalent of the simple shift-add algorithm
1028  * we used for multiplication.  However, we can convert the division problem
1029  * into a multiplication problem by pre-computing the binary representation
1030  * of the reciprocal of the divisor.  For the case of interest, we have
1031  *
1032  *	1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30,
1033  *
1034  * to 32 bits of precision.  (The notation B-30 means "* 2^-30", just like
1035  * E-18 means "* 10^-18".)
1036  *
1037  * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit
1038  * integer 10001001011100000101111101000001, then normalize (shift) the
1039  * result.  This constant has several large bits runs, so the multiply
1040  * is relatively cheap:
1041  *
1042  *	10001001011100000101111101000001 = 10001001100000000110000001000001
1043  *					 - 00000000000100000000000100000000
1044  *
1045  * Again, you can just read the algorithm right off the bits:
1046  *
1047  *			sec = hrt;
1048  *			sec += (hrt << 6);
1049  *			sec -= (hrt << 8);
1050  *			sec += (hrt << 13);
1051  *			sec += (hrt << 14);
1052  *			sec -= (hrt << 20);
1053  *			sec += (hrt << 23);
1054  *			sec += (hrt << 24);
1055  *			sec += (hrt << 27);
1056  *			sec += (hrt << 31);
1057  *			sec >>= (32 + 30);
1058  *
1059  * Voila!  The only problem is, since hrt is 64 bits, we need to use 96-bit
1060  * arithmetic to perform this calculation.  That's a waste, because ultimately
1061  * we only need the highest 32 bits of the result.
1062  *
1063  * The first thing we do is to realize that we don't need to use all of hrt
1064  * in the calculation.  The lowest 30 bits can contribute at most 1 to the
1065  * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later.
1066  * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t.
1067  * Thus, the only bits of hrt that matter for division are bits 30..61.
1068  * These 32 bits are just the lower-order word of (hrt >> 30).  This brings
1069  * us down from 96-bit math to 64-bit math, and our algorithm becomes:
1070  *
1071  *			tmp = (uint32_t) (hrt >> 30);
1072  *			sec = tmp;
1073  *			sec += (tmp << 6);
1074  *			sec -= (tmp << 8);
1075  *			sec += (tmp << 13);
1076  *			sec += (tmp << 14);
1077  *			sec -= (tmp << 20);
1078  *			sec += (tmp << 23);
1079  *			sec += (tmp << 24);
1080  *			sec += (tmp << 27);
1081  *			sec += (tmp << 31);
1082  *			sec >>= 32;
1083  *
1084  * Next, we're going to reduce this 64-bit computation to a 32-bit
1085  * computation.  We begin by rewriting the above algorithm to use relative
1086  * shifts instead of absolute shifts.  That is, instead of computing
1087  * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally:
1088  * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc:
1089  *
1090  *			tmp = (uint32_t) (hrt >> 30);
1091  *			sec = tmp;
1092  *			tmp <<= 6; sec += tmp;
1093  *			tmp <<= 2; sec -= tmp;
1094  *			tmp <<= 5; sec += tmp;
1095  *			tmp <<= 1; sec += tmp;
1096  *			tmp <<= 6; sec -= tmp;
1097  *			tmp <<= 3; sec += tmp;
1098  *			tmp <<= 1; sec += tmp;
1099  *			tmp <<= 3; sec += tmp;
1100  *			tmp <<= 4; sec += tmp;
1101  *			sec >>= 32;
1102  *
1103  * Now for the final step.  Instead of throwing away the low 32 bits at
1104  * the end, we can throw them away as we go, only keeping the high 32 bits
1105  * of the product at each step.  So, for example, where we now have
1106  *
1107  *			tmp <<= 6; sec = sec + tmp;
1108  * we will instead have
1109  *			tmp <<= 6; sec = (sec + tmp) >> 6;
1110  * which is equivalent to
1111  *			sec = (sec >> 6) + tmp;
1112  *
1113  * The final shift ("sec >>= 32") goes away.
1114  *
1115  * All we're really doing here is long multiplication, just like we learned in
1116  * grade school, except that at each step, we only look at the leftmost 32
1117  * columns.  The cumulative error is, at most, the sum of all the bits we
1118  * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32.
1119  * Thus, the final result ("sec") is correct to +/- 1.
1120  *
1121  * It turns out to be important to keep "sec" positive at each step, because
1122  * we don't want to have to explicitly extend the sign bit.  Therefore,
1123  * starting with the last line of code above, each line that would have read
1124  * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and
1125  * the operators (+ or -) in all previous lines must be toggled accordingly.
1126  * Thus, we end up with:
1127  *
1128  *			tmp = (uint32_t) (hrt >> 30);
1129  *			sec = tmp + (sec >> 6);
1130  *			sec = tmp - (tmp >> 2);
1131  *			sec = tmp - (sec >> 5);
1132  *			sec = tmp + (sec >> 1);
1133  *			sec = tmp - (sec >> 6);
1134  *			sec = tmp - (sec >> 3);
1135  *			sec = tmp + (sec >> 1);
1136  *			sec = tmp + (sec >> 3);
1137  *			sec = tmp + (sec >> 4);
1138  *
1139  * This yields a value for sec that is accurate to +1/-1, so we have two
1140  * cases to deal with.  The mysterious-looking "+ 7" in the code below biases
1141  * the rounding toward zero, so that sec is always less than or equal to
1142  * the correct value.  With this modified code, sec is accurate to +0/-2, with
1143  * the -2 case being very rare in practice.  With this change, we only have to
1144  * deal with one case (sec too small) in the cleanup code.
1145  *
1146  * The other modification we make is to delete the second line above
1147  * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is
1148  * set, and the cleanup code can handle that rare case.  This reduces the
1149  * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases.
1150  *
1151  * Finally, we compute nsec = hrt - (sec * 1,000,000,000).  nsec will always
1152  * be positive (since sec is never too large), and will at most be equal to
1153  * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt.
1154  * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can
1155  * safely assume that nsec fits in 32 bits.  Consequently, when we compute
1156  * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit
1157  * arithmetic and let the high-order bits fall off the end.
1158  *
1159  * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop:
1160  *
1161  *			while (nsec >= NANOSEC) {
1162  *				nsec -= NANOSEC;
1163  *				sec++;
1164  *			}
1165  *
1166  * is guaranteed to complete in at most 4 iterations.  In practice, the loop
1167  * completes in 0 or 1 iteration over 95% of the time.
1168  *
1169  * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about
1170  * 35 usec for software division -- about 20 times faster.
1171  */
1172 void
hrt2ts(hrtime_t hrt,timestruc_t * tsp)1173 hrt2ts(hrtime_t hrt, timestruc_t *tsp)
1174 {
1175 	uint32_t sec, nsec, tmp;
1176 
1177 	tmp = (uint32_t)(hrt >> 30);
1178 	sec = tmp - (tmp >> 2);
1179 	sec = tmp - (sec >> 5);
1180 	sec = tmp + (sec >> 1);
1181 	sec = tmp - (sec >> 6) + 7;
1182 	sec = tmp - (sec >> 3);
1183 	sec = tmp + (sec >> 1);
1184 	sec = tmp + (sec >> 3);
1185 	sec = tmp + (sec >> 4);
1186 	tmp = (sec << 7) - sec - sec - sec;
1187 	tmp = (tmp << 7) - tmp - tmp - tmp;
1188 	tmp = (tmp << 7) - tmp - tmp - tmp;
1189 	nsec = (uint32_t)hrt - (tmp << 9);
1190 	while (nsec >= NANOSEC) {
1191 		nsec -= NANOSEC;
1192 		sec++;
1193 	}
1194 	tsp->tv_sec = (time_t)sec;
1195 	tsp->tv_nsec = nsec;
1196 }
1197 
1198 /*
1199  * Convert from timestruc_t to hrtime_t.
1200  *
1201  * The code below is equivalent to:
1202  *
1203  *	hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
1204  *
1205  * but requires no integer multiply.
1206  */
1207 hrtime_t
ts2hrt(const timestruc_t * tsp)1208 ts2hrt(const timestruc_t *tsp)
1209 {
1210 	hrtime_t hrt;
1211 
1212 	hrt = tsp->tv_sec;
1213 	hrt = (hrt << 7) - hrt - hrt - hrt;
1214 	hrt = (hrt << 7) - hrt - hrt - hrt;
1215 	hrt = (hrt << 7) - hrt - hrt - hrt;
1216 	hrt = (hrt << 9) + tsp->tv_nsec;
1217 	return (hrt);
1218 }
1219 
1220 /*
1221  * For the various 32-bit "compatibility" paths in the system.
1222  */
1223 void
hrt2ts32(hrtime_t hrt,timestruc32_t * ts32p)1224 hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p)
1225 {
1226 	timestruc_t ts;
1227 
1228 	hrt2ts(hrt, &ts);
1229 	TIMESPEC_TO_TIMESPEC32(ts32p, &ts);
1230 }
1231 
1232 /*
1233  * If this ever becomes performance critical (ha!), we can borrow the
1234  * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the
1235  * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by
1236  * 1,000.  For now, we'll opt for readability (besides, the compiler does
1237  * a passable job of optimizing constant multiplication into shifts and adds).
1238  */
1239 hrtime_t
tv2hrt(struct timeval * tvp)1240 tv2hrt(struct timeval *tvp)
1241 {
1242 	return ((hrtime_t)tvp->tv_sec * NANOSEC +
1243 	    (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC));
1244 }
1245 
1246 void
hrt2tv(hrtime_t hrt,struct timeval * tvp)1247 hrt2tv(hrtime_t hrt, struct timeval *tvp)
1248 {
1249 	uint32_t sec, nsec, tmp;
1250 	uint32_t q, r, t;
1251 
1252 	tmp = (uint32_t)(hrt >> 30);
1253 	sec = tmp - (tmp >> 2);
1254 	sec = tmp - (sec >> 5);
1255 	sec = tmp + (sec >> 1);
1256 	sec = tmp - (sec >> 6) + 7;
1257 	sec = tmp - (sec >> 3);
1258 	sec = tmp + (sec >> 1);
1259 	sec = tmp + (sec >> 3);
1260 	sec = tmp + (sec >> 4);
1261 	tmp = (sec << 7) - sec - sec - sec;
1262 	tmp = (tmp << 7) - tmp - tmp - tmp;
1263 	tmp = (tmp << 7) - tmp - tmp - tmp;
1264 	nsec = (uint32_t)hrt - (tmp << 9);
1265 	while (nsec >= NANOSEC) {
1266 		nsec -= NANOSEC;
1267 		sec++;
1268 	}
1269 	tvp->tv_sec = (time_t)sec;
1270 /*
1271  * this routine is very similar to hr2ts, but requires microseconds
1272  * instead of nanoseconds, so an interger divide by 1000 routine
1273  * completes the conversion
1274  */
1275 	t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12);
1276 	q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14);
1277 	q = q >> 9;
1278 	r = nsec - q*1000;
1279 	tvp->tv_usec = q + ((r + 24) >> 10);
1280 
1281 }
1282 
1283 int
nanosleep(timespec_t * rqtp,timespec_t * rmtp)1284 nanosleep(timespec_t *rqtp, timespec_t *rmtp)
1285 {
1286 	timespec_t rqtime;
1287 	timespec_t rmtime;
1288 	timespec_t now;
1289 	int timecheck;
1290 	int ret = 1;
1291 	model_t datamodel = get_udatamodel();
1292 
1293 	timecheck = timechanged;
1294 	gethrestime(&now);
1295 
1296 	if (datamodel == DATAMODEL_NATIVE) {
1297 		if (copyin(rqtp, &rqtime, sizeof (rqtime)))
1298 			return (set_errno(EFAULT));
1299 	} else {
1300 		timespec32_t rqtime32;
1301 
1302 		if (copyin(rqtp, &rqtime32, sizeof (rqtime32)))
1303 			return (set_errno(EFAULT));
1304 		TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32);
1305 	}
1306 
1307 	if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 ||
1308 	    rqtime.tv_nsec >= NANOSEC)
1309 		return (set_errno(EINVAL));
1310 
1311 	if (timerspecisset(&rqtime)) {
1312 		timespecadd(&rqtime, &now);
1313 		mutex_enter(&curthread->t_delay_lock);
1314 		while ((ret = cv_waituntil_sig(&curthread->t_delay_cv,
1315 		    &curthread->t_delay_lock, &rqtime, timecheck)) > 0)
1316 			continue;
1317 		mutex_exit(&curthread->t_delay_lock);
1318 	}
1319 
1320 	if (rmtp) {
1321 		/*
1322 		 * If cv_waituntil_sig() returned due to a signal, and
1323 		 * there is time remaining, then set the time remaining.
1324 		 * Else set time remaining to zero
1325 		 */
1326 		rmtime.tv_sec = rmtime.tv_nsec = 0;
1327 		if (ret == 0) {
1328 			timespec_t delta = rqtime;
1329 
1330 			gethrestime(&now);
1331 			timespecsub(&delta, &now);
1332 			if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
1333 			    delta.tv_nsec > 0))
1334 				rmtime = delta;
1335 		}
1336 
1337 		if (datamodel == DATAMODEL_NATIVE) {
1338 			if (copyout(&rmtime, rmtp, sizeof (rmtime)))
1339 				return (set_errno(EFAULT));
1340 		} else {
1341 			timespec32_t rmtime32;
1342 
1343 			TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime);
1344 			if (copyout(&rmtime32, rmtp, sizeof (rmtime32)))
1345 				return (set_errno(EFAULT));
1346 		}
1347 	}
1348 
1349 	if (ret == 0)
1350 		return (set_errno(EINTR));
1351 	return (0);
1352 }
1353 
1354 /*
1355  * Routines to convert standard UNIX time (seconds since Jan 1, 1970)
1356  * into year/month/day/hour/minute/second format, and back again.
1357  * Note: these routines require tod_lock held to protect cached state.
1358  */
1359 static int days_thru_month[64] = {
1360 	0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0,
1361 	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1362 	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1363 	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1364 };
1365 
1366 todinfo_t saved_tod;
1367 int saved_utc = -60;
1368 
1369 todinfo_t
utc_to_tod(time_t utc)1370 utc_to_tod(time_t utc)
1371 {
1372 	long dse, day, month, year;
1373 	todinfo_t tod;
1374 
1375 	ASSERT(MUTEX_HELD(&tod_lock));
1376 
1377 	/*
1378 	 * Note that tod_set_prev() assumes utc will be set to zero in
1379 	 * the case of it being negative.  Consequently, any change made
1380 	 * to this behavior would have to be reflected in that function
1381 	 * as well.
1382 	 */
1383 	if (utc < 0)			/* should never happen */
1384 		utc = 0;
1385 
1386 	saved_tod.tod_sec += utc - saved_utc;
1387 	saved_utc = utc;
1388 	if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60)
1389 		return (saved_tod);	/* only the seconds changed */
1390 
1391 	dse = utc / 86400;		/* days since epoch */
1392 
1393 	tod.tod_sec = utc % 60;
1394 	tod.tod_min = (utc % 3600) / 60;
1395 	tod.tod_hour = (utc % 86400) / 3600;
1396 	tod.tod_dow = (dse + 4) % 7 + 1;	/* epoch was a Thursday */
1397 
1398 	year = dse / 365 + 72;	/* first guess -- always a bit too large */
1399 	do {
1400 		year--;
1401 		day = dse - 365 * (year - 70) - ((year - 69) >> 2);
1402 	} while (day < 0);
1403 
1404 	month = ((year & 3) << 4) + 1;
1405 	while (day >= days_thru_month[month + 1])
1406 		month++;
1407 
1408 	tod.tod_day = day - days_thru_month[month] + 1;
1409 	tod.tod_month = month & 15;
1410 	tod.tod_year = year;
1411 
1412 	saved_tod = tod;
1413 	return (tod);
1414 }
1415 
1416 time_t
tod_to_utc(todinfo_t tod)1417 tod_to_utc(todinfo_t tod)
1418 {
1419 	time_t utc;
1420 	int year = tod.tod_year;
1421 	int month = tod.tod_month + ((year & 3) << 4);
1422 #ifdef DEBUG
1423 	/* only warn once, not each time called */
1424 	static int year_warn = 1;
1425 	static int month_warn = 1;
1426 	static int day_warn = 1;
1427 	static int hour_warn = 1;
1428 	static int min_warn = 1;
1429 	static int sec_warn = 1;
1430 	int days_diff = days_thru_month[month + 1] - days_thru_month[month];
1431 #endif
1432 
1433 	ASSERT(MUTEX_HELD(&tod_lock));
1434 
1435 #ifdef DEBUG
1436 	if (year_warn && (year < 70 || year > 8029)) {
1437 		cmn_err(CE_WARN,
1438 		    "The hardware real-time clock appears to have the "
1439 		    "wrong years value %d -- time needs to be reset\n",
1440 		    year);
1441 		year_warn = 0;
1442 	}
1443 
1444 	if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) {
1445 		cmn_err(CE_WARN,
1446 		    "The hardware real-time clock appears to have the "
1447 		    "wrong months value %d -- time needs to be reset\n",
1448 		    tod.tod_month);
1449 		month_warn = 0;
1450 	}
1451 
1452 	if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) {
1453 		cmn_err(CE_WARN,
1454 		    "The hardware real-time clock appears to have the "
1455 		    "wrong days value %d -- time needs to be reset\n",
1456 		    tod.tod_day);
1457 		day_warn = 0;
1458 	}
1459 
1460 	if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) {
1461 		cmn_err(CE_WARN,
1462 		    "The hardware real-time clock appears to have the "
1463 		    "wrong hours value %d -- time needs to be reset\n",
1464 		    tod.tod_hour);
1465 		hour_warn = 0;
1466 	}
1467 
1468 	if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) {
1469 		cmn_err(CE_WARN,
1470 		    "The hardware real-time clock appears to have the "
1471 		    "wrong minutes value %d -- time needs to be reset\n",
1472 		    tod.tod_min);
1473 		min_warn = 0;
1474 	}
1475 
1476 	if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) {
1477 		cmn_err(CE_WARN,
1478 		    "The hardware real-time clock appears to have the "
1479 		    "wrong seconds value %d -- time needs to be reset\n",
1480 		    tod.tod_sec);
1481 		sec_warn = 0;
1482 	}
1483 #endif
1484 
1485 	utc = (year - 70);		/* next 3 lines: utc = 365y + y/4 */
1486 	utc += (utc << 3) + (utc << 6);
1487 	utc += (utc << 2) + ((year - 69) >> 2);
1488 	utc += days_thru_month[month] + tod.tod_day - 1;
1489 	utc = (utc << 3) + (utc << 4) + tod.tod_hour;	/* 24 * day + hour */
1490 	utc = (utc << 6) - (utc << 2) + tod.tod_min;	/* 60 * hour + min */
1491 	utc = (utc << 6) - (utc << 2) + tod.tod_sec;	/* 60 * min + sec */
1492 
1493 	return (utc);
1494 }
1495