xref: /illumos-gate/usr/src/uts/common/os/timers.c (revision 86ef0a63e1cfa5dc98606efef379365acca98063)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2016 Joyent, Inc.
26  */
27 
28 /*
29  * Copyright (c) 1982, 1986 Regents of the University of California.
30  * All rights reserved.  The Berkeley software License Agreement
31  * specifies the terms and conditions for redistribution.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/user.h>
36 #include <sys/vnode.h>
37 #include <sys/proc.h>
38 #include <sys/time.h>
39 #include <sys/systm.h>
40 #include <sys/kmem.h>
41 #include <sys/cmn_err.h>
42 #include <sys/cpuvar.h>
43 #include <sys/timer.h>
44 #include <sys/debug.h>
45 #include <sys/sysmacros.h>
46 #include <sys/cyclic.h>
47 
48 static void	realitexpire(void *);
49 static void	realprofexpire(void *);
50 static void	timeval_advance(struct timeval *, struct timeval *);
51 
52 kmutex_t tod_lock;	/* protects time-of-day stuff */
53 
54 /*
55  * Constant to define the minimum interval value of the ITIMER_REALPROF timer.
56  * Value is in microseconds; defaults to 500 usecs.  Setting this value
57  * significantly lower may allow for denial-of-service attacks.
58  */
59 int itimer_realprof_minimum = 500;
60 
61 /*
62  * macro to compare a timeval to a timestruc
63  */
64 
65 #define	TVTSCMP(tvp, tsp, cmp) \
66 	/* CSTYLED */ \
67 	((tvp)->tv_sec cmp (tsp)->tv_sec || \
68 	((tvp)->tv_sec == (tsp)->tv_sec && \
69 	/* CSTYLED */ \
70 	(tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec))
71 
72 /*
73  * Time of day and interval timer support.
74  *
75  * These routines provide the kernel entry points to get and set
76  * the time-of-day and per-process interval timers.  Subroutines
77  * here provide support for adding and subtracting timeval structures
78  * and decrementing interval timers, optionally reloading the interval
79  * timers when they expire.
80  */
81 
82 /*
83  * SunOS function to generate monotonically increasing time values.
84  */
85 void
uniqtime(struct timeval * tv)86 uniqtime(struct timeval *tv)
87 {
88 	static struct timeval last;
89 	static int last_timechanged;
90 	timestruc_t ts;
91 	time_t sec;
92 	int usec, nsec;
93 
94 	/*
95 	 * protect modification of last
96 	 */
97 	mutex_enter(&tod_lock);
98 	gethrestime(&ts);
99 
100 	/*
101 	 * Fast algorithm to convert nsec to usec -- see hrt2ts()
102 	 * in common/os/timers.c for a full description.
103 	 */
104 	nsec = ts.tv_nsec;
105 	usec = nsec + (nsec >> 2);
106 	usec = nsec + (usec >> 1);
107 	usec = nsec + (usec >> 2);
108 	usec = nsec + (usec >> 4);
109 	usec = nsec - (usec >> 3);
110 	usec = nsec + (usec >> 2);
111 	usec = nsec + (usec >> 3);
112 	usec = nsec + (usec >> 4);
113 	usec = nsec + (usec >> 1);
114 	usec = nsec + (usec >> 6);
115 	usec = usec >> 10;
116 	sec = ts.tv_sec;
117 
118 	/*
119 	 * If the system hres time has been changed since the last time
120 	 * we are called. then all bets are off; just update our
121 	 * local copy of timechanged and accept the reported time as is.
122 	 */
123 	if (last_timechanged != timechanged) {
124 		last_timechanged = timechanged;
125 	}
126 	/*
127 	 * Try to keep timestamps unique, but don't be obsessive about
128 	 * it in the face of large differences.
129 	 */
130 	else if ((sec <= last.tv_sec) &&	/* same or lower seconds, and */
131 	    ((sec != last.tv_sec) ||		/* either different second or */
132 	    (usec <= last.tv_usec)) &&		/* lower microsecond, and */
133 	    ((last.tv_sec - sec) <= 5)) {	/* not way back in time */
134 		sec = last.tv_sec;
135 		usec = last.tv_usec + 1;
136 		if (usec >= MICROSEC) {
137 			usec -= MICROSEC;
138 			sec++;
139 		}
140 	}
141 	last.tv_sec = sec;
142 	last.tv_usec = usec;
143 	mutex_exit(&tod_lock);
144 
145 	tv->tv_sec = sec;
146 	tv->tv_usec = usec;
147 }
148 
149 /*
150  * Timestamps are exported from the kernel in several places.
151  * Such timestamps are commonly used for either uniqueness or for
152  * sequencing - truncation to 32-bits is fine for uniqueness,
153  * but sequencing is going to take more work as we get closer to 2038!
154  */
155 void
uniqtime32(struct timeval32 * tv32p)156 uniqtime32(struct timeval32 *tv32p)
157 {
158 	struct timeval tv;
159 
160 	uniqtime(&tv);
161 	TIMEVAL_TO_TIMEVAL32(tv32p, &tv);
162 }
163 
164 int
gettimeofday(struct timeval * tp)165 gettimeofday(struct timeval *tp)
166 {
167 	struct timeval atv;
168 
169 	if (tp) {
170 		uniqtime(&atv);
171 		if (get_udatamodel() == DATAMODEL_NATIVE) {
172 			if (copyout(&atv, tp, sizeof (atv)))
173 				return (set_errno(EFAULT));
174 		} else {
175 			struct timeval32 tv32;
176 
177 			if (TIMEVAL_OVERFLOW(&atv))
178 				return (set_errno(EOVERFLOW));
179 			TIMEVAL_TO_TIMEVAL32(&tv32, &atv);
180 
181 			if (copyout(&tv32, tp, sizeof (tv32)))
182 				return (set_errno(EFAULT));
183 		}
184 	}
185 	return (0);
186 }
187 
188 int
getitimer(uint_t which,struct itimerval * itv)189 getitimer(uint_t which, struct itimerval *itv)
190 {
191 	int error;
192 
193 	if (get_udatamodel() == DATAMODEL_NATIVE)
194 		error = xgetitimer(which, itv, 0);
195 	else {
196 		struct itimerval kitv;
197 
198 		if ((error = xgetitimer(which, &kitv, 1)) == 0) {
199 			if (ITIMERVAL_OVERFLOW(&kitv)) {
200 				error = EOVERFLOW;
201 			} else {
202 				struct itimerval32 itv32;
203 
204 				ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv);
205 				if (copyout(&itv32, itv, sizeof (itv32)) != 0)
206 					error = EFAULT;
207 			}
208 		}
209 	}
210 
211 	return (error ? (set_errno(error)) : 0);
212 }
213 
214 int
xgetitimer(uint_t which,struct itimerval * itv,int iskaddr)215 xgetitimer(uint_t which, struct itimerval *itv, int iskaddr)
216 {
217 	struct proc *p = curproc;
218 	struct timeval now;
219 	struct itimerval aitv;
220 	hrtime_t ts, first, interval, remain;
221 
222 	mutex_enter(&p->p_lock);
223 
224 	switch (which) {
225 	case ITIMER_VIRTUAL:
226 	case ITIMER_PROF:
227 		aitv = ttolwp(curthread)->lwp_timer[which];
228 		break;
229 
230 	case ITIMER_REAL:
231 		uniqtime(&now);
232 		aitv = p->p_realitimer;
233 
234 		if (timerisset(&aitv.it_value)) {
235 			/*CSTYLED*/
236 			if (timercmp(&aitv.it_value, &now, <)) {
237 				timerclear(&aitv.it_value);
238 			} else {
239 				timevalsub(&aitv.it_value, &now);
240 			}
241 		}
242 		break;
243 
244 	case ITIMER_REALPROF:
245 		if (curproc->p_rprof_cyclic == CYCLIC_NONE) {
246 			bzero(&aitv, sizeof (aitv));
247 			break;
248 		}
249 
250 		aitv = curproc->p_rprof_timer;
251 
252 		first = tv2hrt(&aitv.it_value);
253 		interval = tv2hrt(&aitv.it_interval);
254 
255 		if ((ts = gethrtime()) < first) {
256 			/*
257 			 * We haven't gone off for the first time; the time
258 			 * remaining is simply the first time we will go
259 			 * off minus the current time.
260 			 */
261 			remain = first - ts;
262 		} else {
263 			if (interval == 0) {
264 				/*
265 				 * This was set as a one-shot, and we've
266 				 * already gone off; there is no time
267 				 * remaining.
268 				 */
269 				remain = 0;
270 			} else {
271 				/*
272 				 * We have a non-zero interval; we need to
273 				 * determine how far we are into the current
274 				 * interval, and subtract that from the
275 				 * interval to determine the time remaining.
276 				 */
277 				remain = interval - ((ts - first) % interval);
278 			}
279 		}
280 
281 		hrt2tv(remain, &aitv.it_value);
282 		break;
283 
284 	default:
285 		mutex_exit(&p->p_lock);
286 		return (EINVAL);
287 	}
288 
289 	mutex_exit(&p->p_lock);
290 
291 	if (iskaddr) {
292 		bcopy(&aitv, itv, sizeof (*itv));
293 	} else {
294 		ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
295 		if (copyout(&aitv, itv, sizeof (*itv)))
296 			return (EFAULT);
297 	}
298 
299 	return (0);
300 }
301 
302 
303 int
setitimer(uint_t which,struct itimerval * itv,struct itimerval * oitv)304 setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv)
305 {
306 	int error;
307 
308 	if (oitv != NULL)
309 		if ((error = getitimer(which, oitv)) != 0)
310 			return (error);
311 
312 	if (itv == NULL)
313 		return (0);
314 
315 	if (get_udatamodel() == DATAMODEL_NATIVE)
316 		error = xsetitimer(which, itv, 0);
317 	else {
318 		struct itimerval32 itv32;
319 		struct itimerval kitv;
320 
321 		if (copyin(itv, &itv32, sizeof (itv32)))
322 			error = EFAULT;
323 		ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32);
324 		error = xsetitimer(which, &kitv, 1);
325 	}
326 
327 	return (error ? (set_errno(error)) : 0);
328 }
329 
330 int
xsetitimer(uint_t which,struct itimerval * itv,int iskaddr)331 xsetitimer(uint_t which, struct itimerval *itv, int iskaddr)
332 {
333 	struct itimerval aitv;
334 	struct timeval now;
335 	struct proc *p = curproc;
336 	kthread_t *t;
337 	timeout_id_t tmp_id;
338 	cyc_handler_t hdlr;
339 	cyc_time_t when;
340 	cyclic_id_t cyclic;
341 	hrtime_t ts;
342 	int min;
343 
344 	if (itv == NULL)
345 		return (0);
346 
347 	if (iskaddr) {
348 		bcopy(itv, &aitv, sizeof (aitv));
349 	} else {
350 		ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
351 		if (copyin(itv, &aitv, sizeof (aitv)))
352 			return (EFAULT);
353 	}
354 
355 	if (which == ITIMER_REALPROF) {
356 		min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)),
357 		    itimer_realprof_minimum);
358 	} else {
359 		min = usec_per_tick;
360 	}
361 
362 	if (itimerfix(&aitv.it_value, min) ||
363 	    (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value)))
364 		return (EINVAL);
365 
366 	mutex_enter(&p->p_lock);
367 	switch (which) {
368 	case ITIMER_REAL:
369 		/*
370 		 * The SITBUSY flag prevents conflicts with multiple
371 		 * threads attempting to perform setitimer(ITIMER_REAL)
372 		 * at the same time, even when we drop p->p_lock below.
373 		 * Any blocked thread returns successfully because the
374 		 * effect is the same as if it got here first, finished,
375 		 * and the other thread then came through and destroyed
376 		 * what it did.  We are just protecting the system from
377 		 * malfunctioning due to the race condition.
378 		 */
379 		if (p->p_flag & SITBUSY) {
380 			mutex_exit(&p->p_lock);
381 			return (0);
382 		}
383 		p->p_flag |= SITBUSY;
384 		while ((tmp_id = p->p_itimerid) != 0) {
385 			/*
386 			 * Avoid deadlock in callout_delete (called from
387 			 * untimeout) which may go to sleep (while holding
388 			 * p_lock). Drop p_lock and re-acquire it after
389 			 * untimeout returns. Need to clear p_itimerid
390 			 * while holding p_lock.
391 			 */
392 			p->p_itimerid = 0;
393 			mutex_exit(&p->p_lock);
394 			(void) untimeout(tmp_id);
395 			mutex_enter(&p->p_lock);
396 		}
397 		if (timerisset(&aitv.it_value)) {
398 			uniqtime(&now);
399 			timevaladd(&aitv.it_value, &now);
400 			p->p_itimerid = realtime_timeout(realitexpire,
401 			    p, hzto(&aitv.it_value));
402 		}
403 		p->p_realitimer = aitv;
404 		p->p_flag &= ~SITBUSY;
405 		break;
406 
407 	case ITIMER_REALPROF:
408 		cyclic = p->p_rprof_cyclic;
409 		p->p_rprof_cyclic = CYCLIC_NONE;
410 
411 		mutex_exit(&p->p_lock);
412 
413 		/*
414 		 * We're now going to acquire cpu_lock, remove the old cyclic
415 		 * if necessary, and add our new cyclic.
416 		 */
417 		mutex_enter(&cpu_lock);
418 
419 		if (cyclic != CYCLIC_NONE)
420 			cyclic_remove(cyclic);
421 
422 		if (!timerisset(&aitv.it_value)) {
423 			/*
424 			 * If we were passed a value of 0, we're done.
425 			 */
426 			mutex_exit(&cpu_lock);
427 			return (0);
428 		}
429 
430 		hdlr.cyh_func = realprofexpire;
431 		hdlr.cyh_arg = p;
432 		hdlr.cyh_level = CY_LOW_LEVEL;
433 
434 		when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value));
435 		when.cyt_interval = tv2hrt(&aitv.it_interval);
436 
437 		if (when.cyt_interval == 0) {
438 			/*
439 			 * Using the same logic as for CLOCK_HIGHRES timers, we
440 			 * set the interval to be INT64_MAX - when.cyt_when to
441 			 * effect a one-shot; see the comment in clock_highres.c
442 			 * for more details on why this works.
443 			 */
444 			when.cyt_interval = INT64_MAX - when.cyt_when;
445 		}
446 
447 		cyclic = cyclic_add(&hdlr, &when);
448 
449 		mutex_exit(&cpu_lock);
450 
451 		/*
452 		 * We have now successfully added the cyclic.  Reacquire
453 		 * p_lock, and see if anyone has snuck in.
454 		 */
455 		mutex_enter(&p->p_lock);
456 
457 		if (p->p_rprof_cyclic != CYCLIC_NONE) {
458 			/*
459 			 * We're racing with another thread establishing an
460 			 * ITIMER_REALPROF interval timer.  We'll let the other
461 			 * thread win (this is a race at the application level,
462 			 * so letting the other thread win is acceptable).
463 			 */
464 			mutex_exit(&p->p_lock);
465 			mutex_enter(&cpu_lock);
466 			cyclic_remove(cyclic);
467 			mutex_exit(&cpu_lock);
468 
469 			return (0);
470 		}
471 
472 		/*
473 		 * Success.  Set our tracking variables in the proc structure,
474 		 * cancel any outstanding ITIMER_PROF, and allocate the
475 		 * per-thread SIGPROF buffers, if possible.
476 		 */
477 		hrt2tv(ts, &aitv.it_value);
478 		p->p_rprof_timer = aitv;
479 		p->p_rprof_cyclic = cyclic;
480 
481 		t = p->p_tlist;
482 		do {
483 			struct itimerval *itvp;
484 
485 			itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF];
486 			timerclear(&itvp->it_interval);
487 			timerclear(&itvp->it_value);
488 
489 			if (t->t_rprof != NULL)
490 				continue;
491 
492 			t->t_rprof =
493 			    kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP);
494 			aston(t);
495 		} while ((t = t->t_forw) != p->p_tlist);
496 
497 		break;
498 
499 	case ITIMER_VIRTUAL:
500 		ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv;
501 		break;
502 
503 	case ITIMER_PROF:
504 		if (p->p_rprof_cyclic != CYCLIC_NONE) {
505 			/*
506 			 * Silently ignore ITIMER_PROF if ITIMER_REALPROF
507 			 * is in effect.
508 			 */
509 			break;
510 		}
511 
512 		ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv;
513 		break;
514 
515 	default:
516 		mutex_exit(&p->p_lock);
517 		return (EINVAL);
518 	}
519 	mutex_exit(&p->p_lock);
520 	return (0);
521 }
522 
523 /*
524  * Delete the ITIMER_REALPROF interval timer.
525  * Called only from exec_args() when exec occurs.
526  * The other ITIMER_* interval timers are specified
527  * to be inherited across exec(), so leave them alone.
528  */
529 void
delete_itimer_realprof(void)530 delete_itimer_realprof(void)
531 {
532 	kthread_t *t = curthread;
533 	struct proc *p = ttoproc(t);
534 	klwp_t *lwp = ttolwp(t);
535 	cyclic_id_t cyclic;
536 
537 	mutex_enter(&p->p_lock);
538 
539 	/* we are performing execve(); assert we are single-threaded */
540 	ASSERT(t == p->p_tlist && t == t->t_forw);
541 
542 	if ((cyclic = p->p_rprof_cyclic) == CYCLIC_NONE) {
543 		mutex_exit(&p->p_lock);
544 	} else {
545 		p->p_rprof_cyclic = CYCLIC_NONE;
546 		/*
547 		 * Delete any current instance of SIGPROF.
548 		 */
549 		if (lwp->lwp_cursig == SIGPROF) {
550 			lwp->lwp_cursig = 0;
551 			lwp->lwp_extsig = 0;
552 			if (lwp->lwp_curinfo) {
553 				siginfofree(lwp->lwp_curinfo);
554 				lwp->lwp_curinfo = NULL;
555 			}
556 		}
557 		/*
558 		 * Delete any pending instances of SIGPROF.
559 		 */
560 		sigdelset(&p->p_sig, SIGPROF);
561 		sigdelset(&p->p_extsig, SIGPROF);
562 		sigdelq(p, NULL, SIGPROF);
563 		sigdelset(&t->t_sig, SIGPROF);
564 		sigdelset(&t->t_extsig, SIGPROF);
565 		sigdelq(p, t, SIGPROF);
566 
567 		mutex_exit(&p->p_lock);
568 
569 		/*
570 		 * Remove the ITIMER_REALPROF cyclic.
571 		 */
572 		mutex_enter(&cpu_lock);
573 		cyclic_remove(cyclic);
574 		mutex_exit(&cpu_lock);
575 	}
576 }
577 
578 /*
579  * Real interval timer expired:
580  * send process whose timer expired an alarm signal.
581  * If time is not set up to reload, then just return.
582  * Else compute next time timer should go off which is > current time.
583  * This is where delay in processing this timeout causes multiple
584  * SIGALRM calls to be compressed into one.
585  */
586 static void
realitexpire(void * arg)587 realitexpire(void *arg)
588 {
589 	struct proc *p = arg;
590 	struct timeval *valp = &p->p_realitimer.it_value;
591 	struct timeval *intervalp = &p->p_realitimer.it_interval;
592 #if !defined(_LP64)
593 	clock_t	ticks;
594 #endif
595 
596 	mutex_enter(&p->p_lock);
597 #if !defined(_LP64)
598 	if ((ticks = hzto(valp)) > 1) {
599 		/*
600 		 * If we are executing before we were meant to, it must be
601 		 * because of an overflow in a prior hzto() calculation.
602 		 * In this case, we want to go to sleep for the recalculated
603 		 * number of ticks. For the special meaning of the value "1"
604 		 * see comment in timespectohz().
605 		 */
606 		p->p_itimerid = realtime_timeout(realitexpire, p, ticks);
607 		mutex_exit(&p->p_lock);
608 		return;
609 	}
610 #endif
611 	sigtoproc(p, NULL, SIGALRM);
612 	if (!timerisset(intervalp)) {
613 		timerclear(valp);
614 		p->p_itimerid = 0;
615 	} else {
616 		/* advance timer value past current time */
617 		timeval_advance(valp, intervalp);
618 		p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp));
619 	}
620 	mutex_exit(&p->p_lock);
621 }
622 
623 /*
624  * Real time profiling interval timer expired:
625  * Increment microstate counters for each lwp in the process
626  * and ensure that running lwps are kicked into the kernel.
627  * If time is not set up to reload, then just return.
628  * Else compute next time timer should go off which is > current time,
629  * as above.
630  */
631 static void
realprofexpire(void * arg)632 realprofexpire(void *arg)
633 {
634 	struct proc *p = arg;
635 	kthread_t *t;
636 
637 	mutex_enter(&p->p_lock);
638 	if (p->p_rprof_cyclic == CYCLIC_NONE ||
639 	    (t = p->p_tlist) == NULL) {
640 		mutex_exit(&p->p_lock);
641 		return;
642 	}
643 	do {
644 		int mstate;
645 
646 		/*
647 		 * Attempt to allocate the SIGPROF buffer, but don't sleep.
648 		 */
649 		if (t->t_rprof == NULL)
650 			t->t_rprof = kmem_zalloc(sizeof (struct rprof),
651 			    KM_NOSLEEP);
652 		if (t->t_rprof == NULL)
653 			continue;
654 
655 		thread_lock(t);
656 		switch (t->t_state) {
657 		case TS_SLEEP:
658 			/*
659 			 * Don't touch the lwp is it is swapped out.
660 			 */
661 			if (!(t->t_schedflag & TS_LOAD)) {
662 				mstate = LMS_SLEEP;
663 				break;
664 			}
665 			switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) {
666 			case LMS_TFAULT:
667 			case LMS_DFAULT:
668 			case LMS_KFAULT:
669 			case LMS_USER_LOCK:
670 				break;
671 			default:
672 				mstate = LMS_SLEEP;
673 				break;
674 			}
675 			break;
676 		case TS_RUN:
677 		case TS_WAIT:
678 			mstate = LMS_WAIT_CPU;
679 			break;
680 		case TS_ONPROC:
681 			switch (mstate = t->t_mstate) {
682 			case LMS_USER:
683 			case LMS_SYSTEM:
684 			case LMS_TRAP:
685 				break;
686 			default:
687 				mstate = LMS_SYSTEM;
688 				break;
689 			}
690 			break;
691 		default:
692 			mstate = t->t_mstate;
693 			break;
694 		}
695 		t->t_rprof->rp_anystate = 1;
696 		t->t_rprof->rp_state[mstate]++;
697 		aston(t);
698 		/*
699 		 * force the thread into the kernel
700 		 * if it is not already there.
701 		 */
702 		if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
703 			poke_cpu(t->t_cpu->cpu_id);
704 		thread_unlock(t);
705 	} while ((t = t->t_forw) != p->p_tlist);
706 
707 	mutex_exit(&p->p_lock);
708 }
709 
710 /*
711  * Advances timer value past the current time of day.  See the detailed
712  * comment for this logic in realitsexpire(), above.
713  */
714 static void
timeval_advance(struct timeval * valp,struct timeval * intervalp)715 timeval_advance(struct timeval *valp, struct timeval *intervalp)
716 {
717 	int cnt2nth;
718 	struct timeval interval2nth;
719 
720 	for (;;) {
721 		interval2nth = *intervalp;
722 		for (cnt2nth = 0; ; cnt2nth++) {
723 			timevaladd(valp, &interval2nth);
724 			/*CSTYLED*/
725 			if (TVTSCMP(valp, &hrestime, >))
726 				break;
727 			timevaladd(&interval2nth, &interval2nth);
728 		}
729 		if (cnt2nth == 0)
730 			break;
731 		timevalsub(valp, &interval2nth);
732 	}
733 }
734 
735 /*
736  * Check that a proposed value to load into the .it_value or .it_interval
737  * part of an interval timer is acceptable, and set it to at least a
738  * specified minimal value.
739  */
740 int
itimerfix(struct timeval * tv,int minimum)741 itimerfix(struct timeval *tv, int minimum)
742 {
743 	if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
744 	    tv->tv_usec < 0 || tv->tv_usec >= MICROSEC)
745 		return (EINVAL);
746 	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum)
747 		tv->tv_usec = minimum;
748 	return (0);
749 }
750 
751 /*
752  * Same as itimerfix, except a) it takes a timespec instead of a timeval and
753  * b) it doesn't truncate based on timeout granularity; consumers of this
754  * interface (e.g. timer_settime()) depend on the passed timespec not being
755  * modified implicitly.
756  */
757 int
itimerspecfix(timespec_t * tv)758 itimerspecfix(timespec_t *tv)
759 {
760 	if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC)
761 		return (EINVAL);
762 	return (0);
763 }
764 
765 /*
766  * Decrement an interval timer by a specified number
767  * of microseconds, which must be less than a second,
768  * i.e. < 1000000.  If the timer expires, then reload
769  * it.  In this case, carry over (usec - old value) to
770  * reducint the value reloaded into the timer so that
771  * the timer does not drift.  This routine assumes
772  * that it is called in a context where the timers
773  * on which it is operating cannot change in value.
774  */
775 int
itimerdecr(struct itimerval * itp,int usec)776 itimerdecr(struct itimerval *itp, int usec)
777 {
778 	if (itp->it_value.tv_usec < usec) {
779 		if (itp->it_value.tv_sec == 0) {
780 			/* expired, and already in next interval */
781 			usec -= itp->it_value.tv_usec;
782 			goto expire;
783 		}
784 		itp->it_value.tv_usec += MICROSEC;
785 		itp->it_value.tv_sec--;
786 	}
787 	itp->it_value.tv_usec -= usec;
788 	usec = 0;
789 	if (timerisset(&itp->it_value))
790 		return (1);
791 	/* expired, exactly at end of interval */
792 expire:
793 	if (timerisset(&itp->it_interval)) {
794 		itp->it_value = itp->it_interval;
795 		itp->it_value.tv_usec -= usec;
796 		if (itp->it_value.tv_usec < 0) {
797 			itp->it_value.tv_usec += MICROSEC;
798 			itp->it_value.tv_sec--;
799 		}
800 	} else
801 		itp->it_value.tv_usec = 0;		/* sec is already 0 */
802 	return (0);
803 }
804 
805 /*
806  * Add and subtract routines for timevals.
807  * N.B.: subtract routine doesn't deal with
808  * results which are before the beginning,
809  * it just gets very confused in this case.
810  * Caveat emptor.
811  */
812 void
timevaladd(struct timeval * t1,struct timeval * t2)813 timevaladd(struct timeval *t1, struct timeval *t2)
814 {
815 	t1->tv_sec += t2->tv_sec;
816 	t1->tv_usec += t2->tv_usec;
817 	timevalfix(t1);
818 }
819 
820 void
timevalsub(struct timeval * t1,struct timeval * t2)821 timevalsub(struct timeval *t1, struct timeval *t2)
822 {
823 	t1->tv_sec -= t2->tv_sec;
824 	t1->tv_usec -= t2->tv_usec;
825 	timevalfix(t1);
826 }
827 
828 void
timevalfix(struct timeval * t1)829 timevalfix(struct timeval *t1)
830 {
831 	if (t1->tv_usec < 0) {
832 		t1->tv_sec--;
833 		t1->tv_usec += MICROSEC;
834 	}
835 	if (t1->tv_usec >= MICROSEC) {
836 		t1->tv_sec++;
837 		t1->tv_usec -= MICROSEC;
838 	}
839 }
840 
841 /*
842  * Same as the routines above. These routines take a timespec instead
843  * of a timeval.
844  */
845 void
timespecadd(timespec_t * t1,timespec_t * t2)846 timespecadd(timespec_t *t1, timespec_t *t2)
847 {
848 	t1->tv_sec += t2->tv_sec;
849 	t1->tv_nsec += t2->tv_nsec;
850 	timespecfix(t1);
851 }
852 
853 void
timespecsub(timespec_t * t1,timespec_t * t2)854 timespecsub(timespec_t *t1, timespec_t *t2)
855 {
856 	t1->tv_sec -= t2->tv_sec;
857 	t1->tv_nsec -= t2->tv_nsec;
858 	timespecfix(t1);
859 }
860 
861 void
timespecfix(timespec_t * t1)862 timespecfix(timespec_t *t1)
863 {
864 	if (t1->tv_nsec < 0) {
865 		t1->tv_sec--;
866 		t1->tv_nsec += NANOSEC;
867 	} else {
868 		if (t1->tv_nsec >= NANOSEC) {
869 			t1->tv_sec++;
870 			t1->tv_nsec -= NANOSEC;
871 		}
872 	}
873 }
874 
875 /*
876  * Compute number of hz until specified time.
877  * Used to compute third argument to timeout() from an absolute time.
878  */
879 clock_t
hzto(struct timeval * tv)880 hzto(struct timeval *tv)
881 {
882 	timespec_t ts, now;
883 
884 	ts.tv_sec = tv->tv_sec;
885 	ts.tv_nsec = tv->tv_usec * 1000;
886 	gethrestime_lasttick(&now);
887 
888 	return (timespectohz(&ts, now));
889 }
890 
891 /*
892  * Compute number of hz until specified time for a given timespec value.
893  * Used to compute third argument to timeout() from an absolute time.
894  */
895 clock_t
timespectohz(timespec_t * tv,timespec_t now)896 timespectohz(timespec_t *tv, timespec_t now)
897 {
898 	clock_t	ticks;
899 	time_t	sec;
900 	int	nsec;
901 
902 	/*
903 	 * Compute number of ticks we will see between now and
904 	 * the target time; returns "1" if the destination time
905 	 * is before the next tick, so we always get some delay,
906 	 * and returns LONG_MAX ticks if we would overflow.
907 	 */
908 	sec = tv->tv_sec - now.tv_sec;
909 	nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1;
910 
911 	if (nsec < 0) {
912 		sec--;
913 		nsec += NANOSEC;
914 	} else if (nsec >= NANOSEC) {
915 		sec++;
916 		nsec -= NANOSEC;
917 	}
918 
919 	ticks = NSEC_TO_TICK(nsec);
920 
921 	/*
922 	 * Compute ticks, accounting for negative and overflow as above.
923 	 * Overflow protection kicks in at about 70 weeks for hz=50
924 	 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
925 	 * kernel :-)
926 	 */
927 	if (sec < 0 || (sec == 0 && ticks < 1))
928 		ticks = 1;			/* protect vs nonpositive */
929 	else if (sec > (LONG_MAX - ticks) / hz)
930 		ticks = LONG_MAX;		/* protect vs overflow */
931 	else
932 		ticks += sec * hz;		/* common case */
933 
934 	return (ticks);
935 }
936 
937 /*
938  * Compute number of hz with the timespec tv specified.
939  * The return type must be 64 bit integer.
940  */
941 int64_t
timespectohz64(timespec_t * tv)942 timespectohz64(timespec_t *tv)
943 {
944 	int64_t ticks;
945 	int64_t sec;
946 	int64_t nsec;
947 
948 	sec = tv->tv_sec;
949 	nsec = tv->tv_nsec + nsec_per_tick - 1;
950 
951 	if (nsec < 0) {
952 		sec--;
953 		nsec += NANOSEC;
954 	} else if (nsec >= NANOSEC) {
955 		sec++;
956 		nsec -= NANOSEC;
957 	}
958 
959 	ticks = NSEC_TO_TICK(nsec);
960 
961 	/*
962 	 * Compute ticks, accounting for negative and overflow as above.
963 	 * Overflow protection kicks in at about 70 weeks for hz=50
964 	 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
965 	 * kernel
966 	 */
967 	if (sec < 0 || (sec == 0 && ticks < 1))
968 		ticks = 1;			/* protect vs nonpositive */
969 	else if (sec > (((~0ULL) >> 1) - ticks) / hz)
970 		ticks = (~0ULL) >> 1;		/* protect vs overflow */
971 	else
972 		ticks += sec * hz;		/* common case */
973 
974 	return (ticks);
975 }
976 
977 /*
978  * hrt2ts(): convert from hrtime_t to timestruc_t.
979  *
980  * All this routine really does is:
981  *
982  *	tsp->sec  = hrt / NANOSEC;
983  *	tsp->nsec = hrt % NANOSEC;
984  *
985  * The black magic below avoids doing a 64-bit by 32-bit integer divide,
986  * which is quite expensive.  There's actually much more going on here than
987  * it might first appear -- don't try this at home.
988  *
989  * For the adventuresome, here's an explanation of how it works.
990  *
991  * Multiplication by a fixed constant is easy -- you just do the appropriate
992  * shifts and adds.  For example, to multiply by 10, we observe that
993  *
994  *	x * 10	= x * (8 + 2)
995  *		= (x * 8) + (x * 2)
996  *		= (x << 3) + (x << 1).
997  *
998  * In general, you can read the algorithm right off the bits: the number 10
999  * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3).
1000  *
1001  * Sometimes you can do better.  For example, 15 is 1111 binary, so the normal
1002  * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3).
1003  * But, it's cheaper if you capitalize on the fact that you have a run of ones:
1004  * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0).  [You would never
1005  * actually perform the operation << 0, since it's a no-op; I'm just writing
1006  * it that way for clarity.]
1007  *
1008  * The other way you can win is if you get lucky with the prime factorization
1009  * of your constant.  The number 1,000,000,000, which we have to multiply
1010  * by below, is a good example.  One billion is 111011100110101100101000000000
1011  * in binary.  If you apply the bit-grouping trick, it doesn't buy you very
1012  * much, because it's only a win for groups of three or more equal bits:
1013  *
1014  * 111011100110101100101000000000 = 1000000000000000000000000000000
1015  *				  -  000100011001010011011000000000
1016  *
1017  * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS,
1018  * we have reduced this to 10 shift/add pairs (20 operations) on the RHS.
1019  * This is better, but not great.
1020  *
1021  * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125,
1022  * and multiply by each factor.  Multiplication by 125 is particularly easy,
1023  * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four
1024  * operations.  So, to multiply by 1,000,000,000, we perform three multipli-
1025  * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations.
1026  * This is the algorithm we actually use in both hrt2ts() and ts2hrt().
1027  *
1028  * Division is harder; there is no equivalent of the simple shift-add algorithm
1029  * we used for multiplication.  However, we can convert the division problem
1030  * into a multiplication problem by pre-computing the binary representation
1031  * of the reciprocal of the divisor.  For the case of interest, we have
1032  *
1033  *	1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30,
1034  *
1035  * to 32 bits of precision.  (The notation B-30 means "* 2^-30", just like
1036  * E-18 means "* 10^-18".)
1037  *
1038  * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit
1039  * integer 10001001011100000101111101000001, then normalize (shift) the
1040  * result.  This constant has several large bits runs, so the multiply
1041  * is relatively cheap:
1042  *
1043  *	10001001011100000101111101000001 = 10001001100000000110000001000001
1044  *					 - 00000000000100000000000100000000
1045  *
1046  * Again, you can just read the algorithm right off the bits:
1047  *
1048  *			sec = hrt;
1049  *			sec += (hrt << 6);
1050  *			sec -= (hrt << 8);
1051  *			sec += (hrt << 13);
1052  *			sec += (hrt << 14);
1053  *			sec -= (hrt << 20);
1054  *			sec += (hrt << 23);
1055  *			sec += (hrt << 24);
1056  *			sec += (hrt << 27);
1057  *			sec += (hrt << 31);
1058  *			sec >>= (32 + 30);
1059  *
1060  * Voila!  The only problem is, since hrt is 64 bits, we need to use 96-bit
1061  * arithmetic to perform this calculation.  That's a waste, because ultimately
1062  * we only need the highest 32 bits of the result.
1063  *
1064  * The first thing we do is to realize that we don't need to use all of hrt
1065  * in the calculation.  The lowest 30 bits can contribute at most 1 to the
1066  * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later.
1067  * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t.
1068  * Thus, the only bits of hrt that matter for division are bits 30..61.
1069  * These 32 bits are just the lower-order word of (hrt >> 30).  This brings
1070  * us down from 96-bit math to 64-bit math, and our algorithm becomes:
1071  *
1072  *			tmp = (uint32_t) (hrt >> 30);
1073  *			sec = tmp;
1074  *			sec += (tmp << 6);
1075  *			sec -= (tmp << 8);
1076  *			sec += (tmp << 13);
1077  *			sec += (tmp << 14);
1078  *			sec -= (tmp << 20);
1079  *			sec += (tmp << 23);
1080  *			sec += (tmp << 24);
1081  *			sec += (tmp << 27);
1082  *			sec += (tmp << 31);
1083  *			sec >>= 32;
1084  *
1085  * Next, we're going to reduce this 64-bit computation to a 32-bit
1086  * computation.  We begin by rewriting the above algorithm to use relative
1087  * shifts instead of absolute shifts.  That is, instead of computing
1088  * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally:
1089  * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc:
1090  *
1091  *			tmp = (uint32_t) (hrt >> 30);
1092  *			sec = tmp;
1093  *			tmp <<= 6; sec += tmp;
1094  *			tmp <<= 2; sec -= tmp;
1095  *			tmp <<= 5; sec += tmp;
1096  *			tmp <<= 1; sec += tmp;
1097  *			tmp <<= 6; sec -= tmp;
1098  *			tmp <<= 3; sec += tmp;
1099  *			tmp <<= 1; sec += tmp;
1100  *			tmp <<= 3; sec += tmp;
1101  *			tmp <<= 4; sec += tmp;
1102  *			sec >>= 32;
1103  *
1104  * Now for the final step.  Instead of throwing away the low 32 bits at
1105  * the end, we can throw them away as we go, only keeping the high 32 bits
1106  * of the product at each step.  So, for example, where we now have
1107  *
1108  *			tmp <<= 6; sec = sec + tmp;
1109  * we will instead have
1110  *			tmp <<= 6; sec = (sec + tmp) >> 6;
1111  * which is equivalent to
1112  *			sec = (sec >> 6) + tmp;
1113  *
1114  * The final shift ("sec >>= 32") goes away.
1115  *
1116  * All we're really doing here is long multiplication, just like we learned in
1117  * grade school, except that at each step, we only look at the leftmost 32
1118  * columns.  The cumulative error is, at most, the sum of all the bits we
1119  * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32.
1120  * Thus, the final result ("sec") is correct to +/- 1.
1121  *
1122  * It turns out to be important to keep "sec" positive at each step, because
1123  * we don't want to have to explicitly extend the sign bit.  Therefore,
1124  * starting with the last line of code above, each line that would have read
1125  * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and
1126  * the operators (+ or -) in all previous lines must be toggled accordingly.
1127  * Thus, we end up with:
1128  *
1129  *			tmp = (uint32_t) (hrt >> 30);
1130  *			sec = tmp + (sec >> 6);
1131  *			sec = tmp - (tmp >> 2);
1132  *			sec = tmp - (sec >> 5);
1133  *			sec = tmp + (sec >> 1);
1134  *			sec = tmp - (sec >> 6);
1135  *			sec = tmp - (sec >> 3);
1136  *			sec = tmp + (sec >> 1);
1137  *			sec = tmp + (sec >> 3);
1138  *			sec = tmp + (sec >> 4);
1139  *
1140  * This yields a value for sec that is accurate to +1/-1, so we have two
1141  * cases to deal with.  The mysterious-looking "+ 7" in the code below biases
1142  * the rounding toward zero, so that sec is always less than or equal to
1143  * the correct value.  With this modified code, sec is accurate to +0/-2, with
1144  * the -2 case being very rare in practice.  With this change, we only have to
1145  * deal with one case (sec too small) in the cleanup code.
1146  *
1147  * The other modification we make is to delete the second line above
1148  * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is
1149  * set, and the cleanup code can handle that rare case.  This reduces the
1150  * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases.
1151  *
1152  * Finally, we compute nsec = hrt - (sec * 1,000,000,000).  nsec will always
1153  * be positive (since sec is never too large), and will at most be equal to
1154  * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt.
1155  * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can
1156  * safely assume that nsec fits in 32 bits.  Consequently, when we compute
1157  * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit
1158  * arithmetic and let the high-order bits fall off the end.
1159  *
1160  * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop:
1161  *
1162  *			while (nsec >= NANOSEC) {
1163  *				nsec -= NANOSEC;
1164  *				sec++;
1165  *			}
1166  *
1167  * is guaranteed to complete in at most 4 iterations.  In practice, the loop
1168  * completes in 0 or 1 iteration over 95% of the time.
1169  *
1170  * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about
1171  * 35 usec for software division -- about 20 times faster.
1172  */
1173 void
hrt2ts(hrtime_t hrt,timestruc_t * tsp)1174 hrt2ts(hrtime_t hrt, timestruc_t *tsp)
1175 {
1176 #if defined(__amd64)
1177 	/*
1178 	 * The cleverness explained above is unecessary on x86_64 CPUs where
1179 	 * modern compilers are able to optimize down to faster operations.
1180 	 */
1181 	tsp->tv_sec = hrt / NANOSEC;
1182 	tsp->tv_nsec = hrt % NANOSEC;
1183 #else
1184 	uint32_t sec, nsec, tmp;
1185 
1186 	tmp = (uint32_t)(hrt >> 30);
1187 	sec = tmp - (tmp >> 2);
1188 	sec = tmp - (sec >> 5);
1189 	sec = tmp + (sec >> 1);
1190 	sec = tmp - (sec >> 6) + 7;
1191 	sec = tmp - (sec >> 3);
1192 	sec = tmp + (sec >> 1);
1193 	sec = tmp + (sec >> 3);
1194 	sec = tmp + (sec >> 4);
1195 	tmp = (sec << 7) - sec - sec - sec;
1196 	tmp = (tmp << 7) - tmp - tmp - tmp;
1197 	tmp = (tmp << 7) - tmp - tmp - tmp;
1198 	nsec = (uint32_t)hrt - (tmp << 9);
1199 	while (nsec >= NANOSEC) {
1200 		nsec -= NANOSEC;
1201 		sec++;
1202 	}
1203 	tsp->tv_sec = (time_t)sec;
1204 	tsp->tv_nsec = nsec;
1205 #endif /* defined(__amd64) */
1206 }
1207 
1208 /*
1209  * Convert from timestruc_t to hrtime_t.
1210  */
1211 hrtime_t
ts2hrt(const timestruc_t * tsp)1212 ts2hrt(const timestruc_t *tsp)
1213 {
1214 #if defined(__x86)
1215 	/*
1216 	 * On modern x86 CPUs, the simple version is faster.
1217 	 */
1218 	return ((tsp->tv_sec * NANOSEC) + tsp->tv_nsec);
1219 #else
1220 	/*
1221 	 * The code below is equivalent to:
1222 	 *
1223 	 *	hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
1224 	 *
1225 	 * but requires no integer multiply.
1226 	 */
1227 	hrtime_t hrt;
1228 
1229 	hrt = tsp->tv_sec;
1230 	hrt = (hrt << 7) - hrt - hrt - hrt;
1231 	hrt = (hrt << 7) - hrt - hrt - hrt;
1232 	hrt = (hrt << 7) - hrt - hrt - hrt;
1233 	hrt = (hrt << 9) + tsp->tv_nsec;
1234 	return (hrt);
1235 #endif /* defined(__x86) */
1236 }
1237 
1238 /*
1239  * For the various 32-bit "compatibility" paths in the system.
1240  */
1241 void
hrt2ts32(hrtime_t hrt,timestruc32_t * ts32p)1242 hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p)
1243 {
1244 	timestruc_t ts;
1245 
1246 	hrt2ts(hrt, &ts);
1247 	TIMESPEC_TO_TIMESPEC32(ts32p, &ts);
1248 }
1249 
1250 /*
1251  * If this ever becomes performance critical (ha!), we can borrow the
1252  * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the
1253  * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by
1254  * 1,000.  For now, we'll opt for readability (besides, the compiler does
1255  * a passable job of optimizing constant multiplication into shifts and adds).
1256  */
1257 hrtime_t
tv2hrt(struct timeval * tvp)1258 tv2hrt(struct timeval *tvp)
1259 {
1260 	return ((hrtime_t)tvp->tv_sec * NANOSEC +
1261 	    (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC));
1262 }
1263 
1264 void
hrt2tv(hrtime_t hrt,struct timeval * tvp)1265 hrt2tv(hrtime_t hrt, struct timeval *tvp)
1266 {
1267 #if defined(__amd64)
1268 	/*
1269 	 * Like hrt2ts, the simple version is faster on x86_64.
1270 	 */
1271 	tvp->tv_sec = hrt / NANOSEC;
1272 	tvp->tv_usec = (hrt % NANOSEC) / (NANOSEC / MICROSEC);
1273 #else
1274 	uint32_t sec, nsec, tmp;
1275 	uint32_t q, r, t;
1276 
1277 	tmp = (uint32_t)(hrt >> 30);
1278 	sec = tmp - (tmp >> 2);
1279 	sec = tmp - (sec >> 5);
1280 	sec = tmp + (sec >> 1);
1281 	sec = tmp - (sec >> 6) + 7;
1282 	sec = tmp - (sec >> 3);
1283 	sec = tmp + (sec >> 1);
1284 	sec = tmp + (sec >> 3);
1285 	sec = tmp + (sec >> 4);
1286 	tmp = (sec << 7) - sec - sec - sec;
1287 	tmp = (tmp << 7) - tmp - tmp - tmp;
1288 	tmp = (tmp << 7) - tmp - tmp - tmp;
1289 	nsec = (uint32_t)hrt - (tmp << 9);
1290 	while (nsec >= NANOSEC) {
1291 		nsec -= NANOSEC;
1292 		sec++;
1293 	}
1294 	tvp->tv_sec = (time_t)sec;
1295 	/*
1296 	 * this routine is very similar to hr2ts, but requires microseconds
1297 	 * instead of nanoseconds, so an interger divide by 1000 routine
1298 	 * completes the conversion
1299 	 */
1300 	t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12);
1301 	q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14);
1302 	q = q >> 9;
1303 	r = nsec - q*1000;
1304 	tvp->tv_usec = q + ((r + 24) >> 10);
1305 #endif /* defined(__amd64) */
1306 }
1307 
1308 int
nanosleep(timespec_t * rqtp,timespec_t * rmtp)1309 nanosleep(timespec_t *rqtp, timespec_t *rmtp)
1310 {
1311 	timespec_t rqtime;
1312 	timespec_t rmtime;
1313 	timespec_t now;
1314 	int timecheck;
1315 	int ret = 1;
1316 	model_t datamodel = get_udatamodel();
1317 
1318 	timecheck = timechanged;
1319 	gethrestime(&now);
1320 
1321 	if (datamodel == DATAMODEL_NATIVE) {
1322 		if (copyin(rqtp, &rqtime, sizeof (rqtime)))
1323 			return (set_errno(EFAULT));
1324 	} else {
1325 		timespec32_t rqtime32;
1326 
1327 		if (copyin(rqtp, &rqtime32, sizeof (rqtime32)))
1328 			return (set_errno(EFAULT));
1329 		TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32);
1330 	}
1331 
1332 	if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 ||
1333 	    rqtime.tv_nsec >= NANOSEC)
1334 		return (set_errno(EINVAL));
1335 
1336 	if (timerspecisset(&rqtime)) {
1337 		timespecadd(&rqtime, &now);
1338 		mutex_enter(&curthread->t_delay_lock);
1339 		while ((ret = cv_waituntil_sig(&curthread->t_delay_cv,
1340 		    &curthread->t_delay_lock, &rqtime, timecheck)) > 0)
1341 			continue;
1342 		mutex_exit(&curthread->t_delay_lock);
1343 	}
1344 
1345 	if (rmtp) {
1346 		/*
1347 		 * If cv_waituntil_sig() returned due to a signal, and
1348 		 * there is time remaining, then set the time remaining.
1349 		 * Else set time remaining to zero
1350 		 */
1351 		rmtime.tv_sec = rmtime.tv_nsec = 0;
1352 		if (ret == 0) {
1353 			timespec_t delta = rqtime;
1354 
1355 			gethrestime(&now);
1356 			timespecsub(&delta, &now);
1357 			if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
1358 			    delta.tv_nsec > 0))
1359 				rmtime = delta;
1360 		}
1361 
1362 		if (datamodel == DATAMODEL_NATIVE) {
1363 			if (copyout(&rmtime, rmtp, sizeof (rmtime)))
1364 				return (set_errno(EFAULT));
1365 		} else {
1366 			timespec32_t rmtime32;
1367 
1368 			TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime);
1369 			if (copyout(&rmtime32, rmtp, sizeof (rmtime32)))
1370 				return (set_errno(EFAULT));
1371 		}
1372 	}
1373 
1374 	if (ret == 0)
1375 		return (set_errno(EINTR));
1376 	return (0);
1377 }
1378 
1379 /*
1380  * Routines to convert standard UNIX time (seconds since Jan 1, 1970)
1381  * into year/month/day/hour/minute/second format, and back again.
1382  * Note: these routines require tod_lock held to protect cached state.
1383  */
1384 static int days_thru_month[64] = {
1385 	0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0,
1386 	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1387 	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1388 	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1389 };
1390 
1391 todinfo_t saved_tod;
1392 int saved_utc = -60;
1393 
1394 todinfo_t
utc_to_tod(time_t utc)1395 utc_to_tod(time_t utc)
1396 {
1397 	long dse, day, month, year;
1398 	todinfo_t tod;
1399 
1400 	ASSERT(MUTEX_HELD(&tod_lock));
1401 
1402 	/*
1403 	 * Note that tod_set_prev() assumes utc will be set to zero in
1404 	 * the case of it being negative.  Consequently, any change made
1405 	 * to this behavior would have to be reflected in that function
1406 	 * as well.
1407 	 */
1408 	if (utc < 0)			/* should never happen */
1409 		utc = 0;
1410 
1411 	saved_tod.tod_sec += utc - saved_utc;
1412 	saved_utc = utc;
1413 	if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60)
1414 		return (saved_tod);	/* only the seconds changed */
1415 
1416 	dse = utc / 86400;		/* days since epoch */
1417 
1418 	tod.tod_sec = utc % 60;
1419 	tod.tod_min = (utc % 3600) / 60;
1420 	tod.tod_hour = (utc % 86400) / 3600;
1421 	tod.tod_dow = (dse + 4) % 7 + 1;	/* epoch was a Thursday */
1422 
1423 	year = dse / 365 + 72;	/* first guess -- always a bit too large */
1424 	do {
1425 		year--;
1426 		day = dse - 365 * (year - 70) - ((year - 69) >> 2);
1427 	} while (day < 0);
1428 
1429 	month = ((year & 3) << 4) + 1;
1430 	while (day >= days_thru_month[month + 1])
1431 		month++;
1432 
1433 	tod.tod_day = day - days_thru_month[month] + 1;
1434 	tod.tod_month = month & 15;
1435 	tod.tod_year = year;
1436 
1437 	saved_tod = tod;
1438 	return (tod);
1439 }
1440 
1441 time_t
tod_to_utc(todinfo_t tod)1442 tod_to_utc(todinfo_t tod)
1443 {
1444 	time_t utc;
1445 	int year = tod.tod_year;
1446 	int month = tod.tod_month + ((year & 3) << 4);
1447 #ifdef DEBUG
1448 	/* only warn once, not each time called */
1449 	static int year_warn = 1;
1450 	static int month_warn = 1;
1451 	static int day_warn = 1;
1452 	static int hour_warn = 1;
1453 	static int min_warn = 1;
1454 	static int sec_warn = 1;
1455 	int days_diff = days_thru_month[month + 1] - days_thru_month[month];
1456 #endif
1457 
1458 	ASSERT(MUTEX_HELD(&tod_lock));
1459 
1460 #ifdef DEBUG
1461 	if (year_warn && (year < 70 || year > 8029)) {
1462 		cmn_err(CE_WARN,
1463 		    "The hardware real-time clock appears to have the "
1464 		    "wrong years value %d -- time needs to be reset\n",
1465 		    year);
1466 		year_warn = 0;
1467 	}
1468 
1469 	if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) {
1470 		cmn_err(CE_WARN,
1471 		    "The hardware real-time clock appears to have the "
1472 		    "wrong months value %d -- time needs to be reset\n",
1473 		    tod.tod_month);
1474 		month_warn = 0;
1475 	}
1476 
1477 	if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) {
1478 		cmn_err(CE_WARN,
1479 		    "The hardware real-time clock appears to have the "
1480 		    "wrong days value %d -- time needs to be reset\n",
1481 		    tod.tod_day);
1482 		day_warn = 0;
1483 	}
1484 
1485 	if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) {
1486 		cmn_err(CE_WARN,
1487 		    "The hardware real-time clock appears to have the "
1488 		    "wrong hours value %d -- time needs to be reset\n",
1489 		    tod.tod_hour);
1490 		hour_warn = 0;
1491 	}
1492 
1493 	if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) {
1494 		cmn_err(CE_WARN,
1495 		    "The hardware real-time clock appears to have the "
1496 		    "wrong minutes value %d -- time needs to be reset\n",
1497 		    tod.tod_min);
1498 		min_warn = 0;
1499 	}
1500 
1501 	if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) {
1502 		cmn_err(CE_WARN,
1503 		    "The hardware real-time clock appears to have the "
1504 		    "wrong seconds value %d -- time needs to be reset\n",
1505 		    tod.tod_sec);
1506 		sec_warn = 0;
1507 	}
1508 #endif
1509 
1510 	utc = (year - 70);		/* next 3 lines: utc = 365y + y/4 */
1511 	utc += (utc << 3) + (utc << 6);
1512 	utc += (utc << 2) + ((year - 69) >> 2);
1513 	utc += days_thru_month[month] + tod.tod_day - 1;
1514 	utc = (utc << 3) + (utc << 4) + tod.tod_hour;	/* 24 * day + hour */
1515 	utc = (utc << 6) - (utc << 2) + tod.tod_min;	/* 60 * hour + min */
1516 	utc = (utc << 6) - (utc << 2) + tod.tod_sec;	/* 60 * min + sec */
1517 
1518 	return (utc);
1519 }
1520