1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/sysproto.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mutex.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/refcount.h> 53 #include <sys/resourcevar.h> 54 #include <sys/rwlock.h> 55 #include <sys/sched.h> 56 #include <sys/sx.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysent.h> 59 #include <sys/time.h> 60 #include <sys/umtx.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <vm/pmap.h> 65 #include <vm/vm_map.h> 66 67 68 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 69 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 70 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 71 static struct rwlock uihashtbl_lock; 72 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 73 static u_long uihash; /* size of hash table - 1 */ 74 75 static void calcru1(struct proc *p, struct rusage_ext *ruxp, 76 struct timeval *up, struct timeval *sp); 77 static int donice(struct thread *td, struct proc *chgp, int n); 78 static struct uidinfo *uilookup(uid_t uid); 79 80 /* 81 * Resource controls and accounting. 82 */ 83 #ifndef _SYS_SYSPROTO_H_ 84 struct getpriority_args { 85 int which; 86 int who; 87 }; 88 #endif 89 int 90 getpriority(td, uap) 91 struct thread *td; 92 register struct getpriority_args *uap; 93 { 94 struct proc *p; 95 struct pgrp *pg; 96 int error, low; 97 98 error = 0; 99 low = PRIO_MAX + 1; 100 switch (uap->which) { 101 102 case PRIO_PROCESS: 103 if (uap->who == 0) 104 low = td->td_proc->p_nice; 105 else { 106 p = pfind(uap->who); 107 if (p == NULL) 108 break; 109 if (p_cansee(td, p) == 0) 110 low = p->p_nice; 111 PROC_UNLOCK(p); 112 } 113 break; 114 115 case PRIO_PGRP: 116 sx_slock(&proctree_lock); 117 if (uap->who == 0) { 118 pg = td->td_proc->p_pgrp; 119 PGRP_LOCK(pg); 120 } else { 121 pg = pgfind(uap->who); 122 if (pg == NULL) { 123 sx_sunlock(&proctree_lock); 124 break; 125 } 126 } 127 sx_sunlock(&proctree_lock); 128 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 129 PROC_LOCK(p); 130 if (p_cansee(td, p) == 0) { 131 if (p->p_nice < low) 132 low = p->p_nice; 133 } 134 PROC_UNLOCK(p); 135 } 136 PGRP_UNLOCK(pg); 137 break; 138 139 case PRIO_USER: 140 if (uap->who == 0) 141 uap->who = td->td_ucred->cr_uid; 142 sx_slock(&allproc_lock); 143 FOREACH_PROC_IN_SYSTEM(p) { 144 /* Do not bother to check PRS_NEW processes */ 145 if (p->p_state == PRS_NEW) 146 continue; 147 PROC_LOCK(p); 148 if (p_cansee(td, p) == 0 && 149 p->p_ucred->cr_uid == uap->who) { 150 if (p->p_nice < low) 151 low = p->p_nice; 152 } 153 PROC_UNLOCK(p); 154 } 155 sx_sunlock(&allproc_lock); 156 break; 157 158 default: 159 error = EINVAL; 160 break; 161 } 162 if (low == PRIO_MAX + 1 && error == 0) 163 error = ESRCH; 164 td->td_retval[0] = low; 165 return (error); 166 } 167 168 #ifndef _SYS_SYSPROTO_H_ 169 struct setpriority_args { 170 int which; 171 int who; 172 int prio; 173 }; 174 #endif 175 int 176 setpriority(td, uap) 177 struct thread *td; 178 struct setpriority_args *uap; 179 { 180 struct proc *curp, *p; 181 struct pgrp *pg; 182 int found = 0, error = 0; 183 184 curp = td->td_proc; 185 switch (uap->which) { 186 case PRIO_PROCESS: 187 if (uap->who == 0) { 188 PROC_LOCK(curp); 189 error = donice(td, curp, uap->prio); 190 PROC_UNLOCK(curp); 191 } else { 192 p = pfind(uap->who); 193 if (p == NULL) 194 break; 195 error = p_cansee(td, p); 196 if (error == 0) 197 error = donice(td, p, uap->prio); 198 PROC_UNLOCK(p); 199 } 200 found++; 201 break; 202 203 case PRIO_PGRP: 204 sx_slock(&proctree_lock); 205 if (uap->who == 0) { 206 pg = curp->p_pgrp; 207 PGRP_LOCK(pg); 208 } else { 209 pg = pgfind(uap->who); 210 if (pg == NULL) { 211 sx_sunlock(&proctree_lock); 212 break; 213 } 214 } 215 sx_sunlock(&proctree_lock); 216 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 217 PROC_LOCK(p); 218 if (p_cansee(td, p) == 0) { 219 error = donice(td, p, uap->prio); 220 found++; 221 } 222 PROC_UNLOCK(p); 223 } 224 PGRP_UNLOCK(pg); 225 break; 226 227 case PRIO_USER: 228 if (uap->who == 0) 229 uap->who = td->td_ucred->cr_uid; 230 sx_slock(&allproc_lock); 231 FOREACH_PROC_IN_SYSTEM(p) { 232 PROC_LOCK(p); 233 if (p->p_ucred->cr_uid == uap->who && 234 p_cansee(td, p) == 0) { 235 error = donice(td, p, uap->prio); 236 found++; 237 } 238 PROC_UNLOCK(p); 239 } 240 sx_sunlock(&allproc_lock); 241 break; 242 243 default: 244 error = EINVAL; 245 break; 246 } 247 if (found == 0 && error == 0) 248 error = ESRCH; 249 return (error); 250 } 251 252 /* 253 * Set "nice" for a (whole) process. 254 */ 255 static int 256 donice(struct thread *td, struct proc *p, int n) 257 { 258 int error; 259 260 PROC_LOCK_ASSERT(p, MA_OWNED); 261 if ((error = p_cansched(td, p))) 262 return (error); 263 if (n > PRIO_MAX) 264 n = PRIO_MAX; 265 if (n < PRIO_MIN) 266 n = PRIO_MIN; 267 if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) 268 return (EACCES); 269 sched_nice(p, n); 270 return (0); 271 } 272 273 /* 274 * Set realtime priority for LWP. 275 */ 276 #ifndef _SYS_SYSPROTO_H_ 277 struct rtprio_thread_args { 278 int function; 279 lwpid_t lwpid; 280 struct rtprio *rtp; 281 }; 282 #endif 283 int 284 rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) 285 { 286 struct proc *p; 287 struct rtprio rtp; 288 struct thread *td1; 289 int cierror, error; 290 291 /* Perform copyin before acquiring locks if needed. */ 292 if (uap->function == RTP_SET) 293 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 294 else 295 cierror = 0; 296 297 /* 298 * Though lwpid is unique, only current process is supported 299 * since there is no efficient way to look up a LWP yet. 300 */ 301 p = td->td_proc; 302 PROC_LOCK(p); 303 304 switch (uap->function) { 305 case RTP_LOOKUP: 306 if ((error = p_cansee(td, p))) 307 break; 308 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 309 td1 = td; 310 else 311 td1 = thread_find(p, uap->lwpid); 312 if (td1 != NULL) 313 pri_to_rtp(td1, &rtp); 314 else 315 error = ESRCH; 316 PROC_UNLOCK(p); 317 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 318 case RTP_SET: 319 if ((error = p_cansched(td, p)) || (error = cierror)) 320 break; 321 322 /* Disallow setting rtprio in most cases if not superuser. */ 323 /* 324 * Realtime priority has to be restricted for reasons which should be 325 * obvious. However, for idle priority, there is a potential for 326 * system deadlock if an idleprio process gains a lock on a resource 327 * that other processes need (and the idleprio process can't run 328 * due to a CPU-bound normal process). Fix me! XXX 329 */ 330 #if 0 331 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 332 #else 333 if (rtp.type != RTP_PRIO_NORMAL) { 334 #endif 335 error = priv_check(td, PRIV_SCHED_RTPRIO); 336 if (error) 337 break; 338 } 339 340 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 341 td1 = td; 342 else 343 td1 = thread_find(p, uap->lwpid); 344 if (td1 != NULL) 345 error = rtp_to_pri(&rtp, td1); 346 else 347 error = ESRCH; 348 break; 349 default: 350 error = EINVAL; 351 break; 352 } 353 PROC_UNLOCK(p); 354 return (error); 355 } 356 357 /* 358 * Set realtime priority. 359 */ 360 #ifndef _SYS_SYSPROTO_H_ 361 struct rtprio_args { 362 int function; 363 pid_t pid; 364 struct rtprio *rtp; 365 }; 366 #endif 367 int 368 rtprio(td, uap) 369 struct thread *td; /* curthread */ 370 register struct rtprio_args *uap; 371 { 372 struct proc *p; 373 struct thread *tdp; 374 struct rtprio rtp; 375 int cierror, error; 376 377 /* Perform copyin before acquiring locks if needed. */ 378 if (uap->function == RTP_SET) 379 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 380 else 381 cierror = 0; 382 383 if (uap->pid == 0) { 384 p = td->td_proc; 385 PROC_LOCK(p); 386 } else { 387 p = pfind(uap->pid); 388 if (p == NULL) 389 return (ESRCH); 390 } 391 392 switch (uap->function) { 393 case RTP_LOOKUP: 394 if ((error = p_cansee(td, p))) 395 break; 396 /* 397 * Return OUR priority if no pid specified, 398 * or if one is, report the highest priority 399 * in the process. There isn't much more you can do as 400 * there is only room to return a single priority. 401 * Note: specifying our own pid is not the same 402 * as leaving it zero. 403 */ 404 if (uap->pid == 0) { 405 pri_to_rtp(td, &rtp); 406 } else { 407 struct rtprio rtp2; 408 409 rtp.type = RTP_PRIO_IDLE; 410 rtp.prio = RTP_PRIO_MAX; 411 FOREACH_THREAD_IN_PROC(p, tdp) { 412 pri_to_rtp(tdp, &rtp2); 413 if (rtp2.type < rtp.type || 414 (rtp2.type == rtp.type && 415 rtp2.prio < rtp.prio)) { 416 rtp.type = rtp2.type; 417 rtp.prio = rtp2.prio; 418 } 419 } 420 } 421 PROC_UNLOCK(p); 422 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 423 case RTP_SET: 424 if ((error = p_cansched(td, p)) || (error = cierror)) 425 break; 426 427 /* Disallow setting rtprio in most cases if not superuser. */ 428 /* 429 * Realtime priority has to be restricted for reasons which should be 430 * obvious. However, for idle priority, there is a potential for 431 * system deadlock if an idleprio process gains a lock on a resource 432 * that other processes need (and the idleprio process can't run 433 * due to a CPU-bound normal process). Fix me! XXX 434 */ 435 #if 0 436 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 437 #else 438 if (rtp.type != RTP_PRIO_NORMAL) { 439 #endif 440 error = priv_check(td, PRIV_SCHED_RTPRIO); 441 if (error) 442 break; 443 } 444 445 /* 446 * If we are setting our own priority, set just our 447 * thread but if we are doing another process, 448 * do all the threads on that process. If we 449 * specify our own pid we do the latter. 450 */ 451 if (uap->pid == 0) { 452 error = rtp_to_pri(&rtp, td); 453 } else { 454 FOREACH_THREAD_IN_PROC(p, td) { 455 if ((error = rtp_to_pri(&rtp, td)) != 0) 456 break; 457 } 458 } 459 break; 460 default: 461 error = EINVAL; 462 break; 463 } 464 PROC_UNLOCK(p); 465 return (error); 466 } 467 468 int 469 rtp_to_pri(struct rtprio *rtp, struct thread *td) 470 { 471 u_char newpri; 472 u_char oldpri; 473 474 if (rtp->prio > RTP_PRIO_MAX) 475 return (EINVAL); 476 thread_lock(td); 477 switch (RTP_PRIO_BASE(rtp->type)) { 478 case RTP_PRIO_REALTIME: 479 newpri = PRI_MIN_REALTIME + rtp->prio; 480 break; 481 case RTP_PRIO_NORMAL: 482 newpri = PRI_MIN_TIMESHARE + rtp->prio; 483 break; 484 case RTP_PRIO_IDLE: 485 newpri = PRI_MIN_IDLE + rtp->prio; 486 break; 487 default: 488 thread_unlock(td); 489 return (EINVAL); 490 } 491 sched_class(td, rtp->type); /* XXX fix */ 492 oldpri = td->td_user_pri; 493 sched_user_prio(td, newpri); 494 if (curthread == td) 495 sched_prio(curthread, td->td_user_pri); /* XXX dubious */ 496 if (TD_ON_UPILOCK(td) && oldpri != newpri) { 497 thread_unlock(td); 498 umtx_pi_adjust(td, oldpri); 499 } else 500 thread_unlock(td); 501 return (0); 502 } 503 504 void 505 pri_to_rtp(struct thread *td, struct rtprio *rtp) 506 { 507 508 thread_lock(td); 509 switch (PRI_BASE(td->td_pri_class)) { 510 case PRI_REALTIME: 511 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; 512 break; 513 case PRI_TIMESHARE: 514 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; 515 break; 516 case PRI_IDLE: 517 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; 518 break; 519 default: 520 break; 521 } 522 rtp->type = td->td_pri_class; 523 thread_unlock(td); 524 } 525 526 #if defined(COMPAT_43) 527 #ifndef _SYS_SYSPROTO_H_ 528 struct osetrlimit_args { 529 u_int which; 530 struct orlimit *rlp; 531 }; 532 #endif 533 int 534 osetrlimit(td, uap) 535 struct thread *td; 536 register struct osetrlimit_args *uap; 537 { 538 struct orlimit olim; 539 struct rlimit lim; 540 int error; 541 542 if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) 543 return (error); 544 lim.rlim_cur = olim.rlim_cur; 545 lim.rlim_max = olim.rlim_max; 546 error = kern_setrlimit(td, uap->which, &lim); 547 return (error); 548 } 549 550 #ifndef _SYS_SYSPROTO_H_ 551 struct ogetrlimit_args { 552 u_int which; 553 struct orlimit *rlp; 554 }; 555 #endif 556 int 557 ogetrlimit(td, uap) 558 struct thread *td; 559 register struct ogetrlimit_args *uap; 560 { 561 struct orlimit olim; 562 struct rlimit rl; 563 struct proc *p; 564 int error; 565 566 if (uap->which >= RLIM_NLIMITS) 567 return (EINVAL); 568 p = td->td_proc; 569 PROC_LOCK(p); 570 lim_rlimit(p, uap->which, &rl); 571 PROC_UNLOCK(p); 572 573 /* 574 * XXX would be more correct to convert only RLIM_INFINITY to the 575 * old RLIM_INFINITY and fail with EOVERFLOW for other larger 576 * values. Most 64->32 and 32->16 conversions, including not 577 * unimportant ones of uids are even more broken than what we 578 * do here (they blindly truncate). We don't do this correctly 579 * here since we have little experience with EOVERFLOW yet. 580 * Elsewhere, getuid() can't fail... 581 */ 582 olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; 583 olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; 584 error = copyout(&olim, uap->rlp, sizeof(olim)); 585 return (error); 586 } 587 #endif /* COMPAT_43 */ 588 589 #ifndef _SYS_SYSPROTO_H_ 590 struct __setrlimit_args { 591 u_int which; 592 struct rlimit *rlp; 593 }; 594 #endif 595 int 596 setrlimit(td, uap) 597 struct thread *td; 598 register struct __setrlimit_args *uap; 599 { 600 struct rlimit alim; 601 int error; 602 603 if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) 604 return (error); 605 error = kern_setrlimit(td, uap->which, &alim); 606 return (error); 607 } 608 609 static void 610 lim_cb(void *arg) 611 { 612 struct rlimit rlim; 613 struct thread *td; 614 struct proc *p; 615 616 p = arg; 617 PROC_LOCK_ASSERT(p, MA_OWNED); 618 /* 619 * Check if the process exceeds its cpu resource allocation. If 620 * it reaches the max, arrange to kill the process in ast(). 621 */ 622 if (p->p_cpulimit == RLIM_INFINITY) 623 return; 624 PROC_SLOCK(p); 625 FOREACH_THREAD_IN_PROC(p, td) { 626 thread_lock(td); 627 ruxagg(&p->p_rux, td); 628 thread_unlock(td); 629 } 630 PROC_SUNLOCK(p); 631 if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { 632 lim_rlimit(p, RLIMIT_CPU, &rlim); 633 if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { 634 killproc(p, "exceeded maximum CPU limit"); 635 } else { 636 if (p->p_cpulimit < rlim.rlim_max) 637 p->p_cpulimit += 5; 638 psignal(p, SIGXCPU); 639 } 640 } 641 callout_reset(&p->p_limco, hz, lim_cb, p); 642 } 643 644 int 645 kern_setrlimit(td, which, limp) 646 struct thread *td; 647 u_int which; 648 struct rlimit *limp; 649 { 650 struct plimit *newlim, *oldlim; 651 struct proc *p; 652 register struct rlimit *alimp; 653 struct rlimit oldssiz; 654 int error; 655 656 if (which >= RLIM_NLIMITS) 657 return (EINVAL); 658 659 /* 660 * Preserve historical bugs by treating negative limits as unsigned. 661 */ 662 if (limp->rlim_cur < 0) 663 limp->rlim_cur = RLIM_INFINITY; 664 if (limp->rlim_max < 0) 665 limp->rlim_max = RLIM_INFINITY; 666 667 oldssiz.rlim_cur = 0; 668 p = td->td_proc; 669 newlim = lim_alloc(); 670 PROC_LOCK(p); 671 oldlim = p->p_limit; 672 alimp = &oldlim->pl_rlimit[which]; 673 if (limp->rlim_cur > alimp->rlim_max || 674 limp->rlim_max > alimp->rlim_max) 675 if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) { 676 PROC_UNLOCK(p); 677 lim_free(newlim); 678 return (error); 679 } 680 if (limp->rlim_cur > limp->rlim_max) 681 limp->rlim_cur = limp->rlim_max; 682 lim_copy(newlim, oldlim); 683 alimp = &newlim->pl_rlimit[which]; 684 685 switch (which) { 686 687 case RLIMIT_CPU: 688 if (limp->rlim_cur != RLIM_INFINITY && 689 p->p_cpulimit == RLIM_INFINITY) 690 callout_reset(&p->p_limco, hz, lim_cb, p); 691 p->p_cpulimit = limp->rlim_cur; 692 break; 693 case RLIMIT_DATA: 694 if (limp->rlim_cur > maxdsiz) 695 limp->rlim_cur = maxdsiz; 696 if (limp->rlim_max > maxdsiz) 697 limp->rlim_max = maxdsiz; 698 break; 699 700 case RLIMIT_STACK: 701 if (limp->rlim_cur > maxssiz) 702 limp->rlim_cur = maxssiz; 703 if (limp->rlim_max > maxssiz) 704 limp->rlim_max = maxssiz; 705 oldssiz = *alimp; 706 if (td->td_proc->p_sysent->sv_fixlimit != NULL) 707 td->td_proc->p_sysent->sv_fixlimit(&oldssiz, 708 RLIMIT_STACK); 709 break; 710 711 case RLIMIT_NOFILE: 712 if (limp->rlim_cur > maxfilesperproc) 713 limp->rlim_cur = maxfilesperproc; 714 if (limp->rlim_max > maxfilesperproc) 715 limp->rlim_max = maxfilesperproc; 716 break; 717 718 case RLIMIT_NPROC: 719 if (limp->rlim_cur > maxprocperuid) 720 limp->rlim_cur = maxprocperuid; 721 if (limp->rlim_max > maxprocperuid) 722 limp->rlim_max = maxprocperuid; 723 if (limp->rlim_cur < 1) 724 limp->rlim_cur = 1; 725 if (limp->rlim_max < 1) 726 limp->rlim_max = 1; 727 break; 728 } 729 if (td->td_proc->p_sysent->sv_fixlimit != NULL) 730 td->td_proc->p_sysent->sv_fixlimit(limp, which); 731 *alimp = *limp; 732 p->p_limit = newlim; 733 PROC_UNLOCK(p); 734 lim_free(oldlim); 735 736 if (which == RLIMIT_STACK) { 737 /* 738 * Stack is allocated to the max at exec time with only 739 * "rlim_cur" bytes accessible. If stack limit is going 740 * up make more accessible, if going down make inaccessible. 741 */ 742 if (limp->rlim_cur != oldssiz.rlim_cur) { 743 vm_offset_t addr; 744 vm_size_t size; 745 vm_prot_t prot; 746 747 if (limp->rlim_cur > oldssiz.rlim_cur) { 748 prot = p->p_sysent->sv_stackprot; 749 size = limp->rlim_cur - oldssiz.rlim_cur; 750 addr = p->p_sysent->sv_usrstack - 751 limp->rlim_cur; 752 } else { 753 prot = VM_PROT_NONE; 754 size = oldssiz.rlim_cur - limp->rlim_cur; 755 addr = p->p_sysent->sv_usrstack - 756 oldssiz.rlim_cur; 757 } 758 addr = trunc_page(addr); 759 size = round_page(size); 760 (void)vm_map_protect(&p->p_vmspace->vm_map, 761 addr, addr + size, prot, FALSE); 762 } 763 } 764 765 return (0); 766 } 767 768 #ifndef _SYS_SYSPROTO_H_ 769 struct __getrlimit_args { 770 u_int which; 771 struct rlimit *rlp; 772 }; 773 #endif 774 /* ARGSUSED */ 775 int 776 getrlimit(td, uap) 777 struct thread *td; 778 register struct __getrlimit_args *uap; 779 { 780 struct rlimit rlim; 781 struct proc *p; 782 int error; 783 784 if (uap->which >= RLIM_NLIMITS) 785 return (EINVAL); 786 p = td->td_proc; 787 PROC_LOCK(p); 788 lim_rlimit(p, uap->which, &rlim); 789 PROC_UNLOCK(p); 790 error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); 791 return (error); 792 } 793 794 /* 795 * Transform the running time and tick information for children of proc p 796 * into user and system time usage. 797 */ 798 void 799 calccru(p, up, sp) 800 struct proc *p; 801 struct timeval *up; 802 struct timeval *sp; 803 { 804 805 PROC_LOCK_ASSERT(p, MA_OWNED); 806 calcru1(p, &p->p_crux, up, sp); 807 } 808 809 /* 810 * Transform the running time and tick information in proc p into user 811 * and system time usage. If appropriate, include the current time slice 812 * on this CPU. 813 */ 814 void 815 calcru(struct proc *p, struct timeval *up, struct timeval *sp) 816 { 817 struct thread *td; 818 uint64_t u; 819 820 PROC_LOCK_ASSERT(p, MA_OWNED); 821 PROC_SLOCK_ASSERT(p, MA_OWNED); 822 /* 823 * If we are getting stats for the current process, then add in the 824 * stats that this thread has accumulated in its current time slice. 825 * We reset the thread and CPU state as if we had performed a context 826 * switch right here. 827 */ 828 td = curthread; 829 if (td->td_proc == p) { 830 u = cpu_ticks(); 831 p->p_rux.rux_runtime += u - PCPU_GET(switchtime); 832 PCPU_SET(switchtime, u); 833 } 834 /* Make sure the per-thread stats are current. */ 835 FOREACH_THREAD_IN_PROC(p, td) { 836 if (td->td_incruntime == 0) 837 continue; 838 thread_lock(td); 839 ruxagg(&p->p_rux, td); 840 thread_unlock(td); 841 } 842 calcru1(p, &p->p_rux, up, sp); 843 } 844 845 static void 846 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, 847 struct timeval *sp) 848 { 849 /* {user, system, interrupt, total} {ticks, usec}: */ 850 u_int64_t ut, uu, st, su, it, tt, tu; 851 852 ut = ruxp->rux_uticks; 853 st = ruxp->rux_sticks; 854 it = ruxp->rux_iticks; 855 tt = ut + st + it; 856 if (tt == 0) { 857 /* Avoid divide by zero */ 858 st = 1; 859 tt = 1; 860 } 861 tu = cputick2usec(ruxp->rux_runtime); 862 if ((int64_t)tu < 0) { 863 /* XXX: this should be an assert /phk */ 864 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", 865 (intmax_t)tu, p->p_pid, p->p_comm); 866 tu = ruxp->rux_tu; 867 } 868 869 if (tu >= ruxp->rux_tu) { 870 /* 871 * The normal case, time increased. 872 * Enforce monotonicity of bucketed numbers. 873 */ 874 uu = (tu * ut) / tt; 875 if (uu < ruxp->rux_uu) 876 uu = ruxp->rux_uu; 877 su = (tu * st) / tt; 878 if (su < ruxp->rux_su) 879 su = ruxp->rux_su; 880 } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { 881 /* 882 * When we calibrate the cputicker, it is not uncommon to 883 * see the presumably fixed frequency increase slightly over 884 * time as a result of thermal stabilization and NTP 885 * discipline (of the reference clock). We therefore ignore 886 * a bit of backwards slop because we expect to catch up 887 * shortly. We use a 3 microsecond limit to catch low 888 * counts and a 1% limit for high counts. 889 */ 890 uu = ruxp->rux_uu; 891 su = ruxp->rux_su; 892 tu = ruxp->rux_tu; 893 } else { /* tu < ruxp->rux_tu */ 894 /* 895 * What happene here was likely that a laptop, which ran at 896 * a reduced clock frequency at boot, kicked into high gear. 897 * The wisdom of spamming this message in that case is 898 * dubious, but it might also be indicative of something 899 * serious, so lets keep it and hope laptops can be made 900 * more truthful about their CPU speed via ACPI. 901 */ 902 printf("calcru: runtime went backwards from %ju usec " 903 "to %ju usec for pid %d (%s)\n", 904 (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, 905 p->p_pid, p->p_comm); 906 uu = (tu * ut) / tt; 907 su = (tu * st) / tt; 908 } 909 910 ruxp->rux_uu = uu; 911 ruxp->rux_su = su; 912 ruxp->rux_tu = tu; 913 914 up->tv_sec = uu / 1000000; 915 up->tv_usec = uu % 1000000; 916 sp->tv_sec = su / 1000000; 917 sp->tv_usec = su % 1000000; 918 } 919 920 #ifndef _SYS_SYSPROTO_H_ 921 struct getrusage_args { 922 int who; 923 struct rusage *rusage; 924 }; 925 #endif 926 int 927 getrusage(td, uap) 928 register struct thread *td; 929 register struct getrusage_args *uap; 930 { 931 struct rusage ru; 932 int error; 933 934 error = kern_getrusage(td, uap->who, &ru); 935 if (error == 0) 936 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 937 return (error); 938 } 939 940 int 941 kern_getrusage(td, who, rup) 942 struct thread *td; 943 int who; 944 struct rusage *rup; 945 { 946 struct proc *p; 947 int error; 948 949 error = 0; 950 p = td->td_proc; 951 PROC_LOCK(p); 952 switch (who) { 953 case RUSAGE_SELF: 954 rufetchcalc(p, rup, &rup->ru_utime, 955 &rup->ru_stime); 956 break; 957 958 case RUSAGE_CHILDREN: 959 *rup = p->p_stats->p_cru; 960 calccru(p, &rup->ru_utime, &rup->ru_stime); 961 break; 962 963 default: 964 error = EINVAL; 965 } 966 PROC_UNLOCK(p); 967 return (error); 968 } 969 970 void 971 rucollect(struct rusage *ru, struct rusage *ru2) 972 { 973 long *ip, *ip2; 974 int i; 975 976 if (ru->ru_maxrss < ru2->ru_maxrss) 977 ru->ru_maxrss = ru2->ru_maxrss; 978 ip = &ru->ru_first; 979 ip2 = &ru2->ru_first; 980 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 981 *ip++ += *ip2++; 982 } 983 984 void 985 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, 986 struct rusage_ext *rux2) 987 { 988 989 rux->rux_runtime += rux2->rux_runtime; 990 rux->rux_uticks += rux2->rux_uticks; 991 rux->rux_sticks += rux2->rux_sticks; 992 rux->rux_iticks += rux2->rux_iticks; 993 rux->rux_uu += rux2->rux_uu; 994 rux->rux_su += rux2->rux_su; 995 rux->rux_tu += rux2->rux_tu; 996 rucollect(ru, ru2); 997 } 998 999 /* 1000 * Aggregate tick counts into the proc's rusage_ext. 1001 */ 1002 void 1003 ruxagg(struct rusage_ext *rux, struct thread *td) 1004 { 1005 1006 THREAD_LOCK_ASSERT(td, MA_OWNED); 1007 PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED); 1008 rux->rux_runtime += td->td_incruntime; 1009 rux->rux_uticks += td->td_uticks; 1010 rux->rux_sticks += td->td_sticks; 1011 rux->rux_iticks += td->td_iticks; 1012 td->td_incruntime = 0; 1013 td->td_uticks = 0; 1014 td->td_iticks = 0; 1015 td->td_sticks = 0; 1016 } 1017 1018 /* 1019 * Update the rusage_ext structure and fetch a valid aggregate rusage 1020 * for proc p if storage for one is supplied. 1021 */ 1022 void 1023 rufetch(struct proc *p, struct rusage *ru) 1024 { 1025 struct thread *td; 1026 1027 PROC_SLOCK_ASSERT(p, MA_OWNED); 1028 1029 *ru = p->p_ru; 1030 if (p->p_numthreads > 0) { 1031 FOREACH_THREAD_IN_PROC(p, td) { 1032 thread_lock(td); 1033 ruxagg(&p->p_rux, td); 1034 thread_unlock(td); 1035 rucollect(ru, &td->td_ru); 1036 } 1037 } 1038 } 1039 1040 /* 1041 * Atomically perform a rufetch and a calcru together. 1042 * Consumers, can safely assume the calcru is executed only once 1043 * rufetch is completed. 1044 */ 1045 void 1046 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, 1047 struct timeval *sp) 1048 { 1049 1050 PROC_SLOCK(p); 1051 rufetch(p, ru); 1052 calcru(p, up, sp); 1053 PROC_SUNLOCK(p); 1054 } 1055 1056 /* 1057 * Allocate a new resource limits structure and initialize its 1058 * reference count and mutex pointer. 1059 */ 1060 struct plimit * 1061 lim_alloc() 1062 { 1063 struct plimit *limp; 1064 1065 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); 1066 refcount_init(&limp->pl_refcnt, 1); 1067 return (limp); 1068 } 1069 1070 struct plimit * 1071 lim_hold(limp) 1072 struct plimit *limp; 1073 { 1074 1075 refcount_acquire(&limp->pl_refcnt); 1076 return (limp); 1077 } 1078 1079 void 1080 lim_fork(struct proc *p1, struct proc *p2) 1081 { 1082 p2->p_limit = lim_hold(p1->p_limit); 1083 callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); 1084 if (p1->p_cpulimit != RLIM_INFINITY) 1085 callout_reset(&p2->p_limco, hz, lim_cb, p2); 1086 } 1087 1088 void 1089 lim_free(limp) 1090 struct plimit *limp; 1091 { 1092 1093 KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); 1094 if (refcount_release(&limp->pl_refcnt)) 1095 free((void *)limp, M_PLIMIT); 1096 } 1097 1098 /* 1099 * Make a copy of the plimit structure. 1100 * We share these structures copy-on-write after fork. 1101 */ 1102 void 1103 lim_copy(dst, src) 1104 struct plimit *dst, *src; 1105 { 1106 1107 KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); 1108 bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); 1109 } 1110 1111 /* 1112 * Return the hard limit for a particular system resource. The 1113 * which parameter specifies the index into the rlimit array. 1114 */ 1115 rlim_t 1116 lim_max(struct proc *p, int which) 1117 { 1118 struct rlimit rl; 1119 1120 lim_rlimit(p, which, &rl); 1121 return (rl.rlim_max); 1122 } 1123 1124 /* 1125 * Return the current (soft) limit for a particular system resource. 1126 * The which parameter which specifies the index into the rlimit array 1127 */ 1128 rlim_t 1129 lim_cur(struct proc *p, int which) 1130 { 1131 struct rlimit rl; 1132 1133 lim_rlimit(p, which, &rl); 1134 return (rl.rlim_cur); 1135 } 1136 1137 /* 1138 * Return a copy of the entire rlimit structure for the system limit 1139 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 1140 */ 1141 void 1142 lim_rlimit(struct proc *p, int which, struct rlimit *rlp) 1143 { 1144 1145 PROC_LOCK_ASSERT(p, MA_OWNED); 1146 KASSERT(which >= 0 && which < RLIM_NLIMITS, 1147 ("request for invalid resource limit")); 1148 *rlp = p->p_limit->pl_rlimit[which]; 1149 if (p->p_sysent->sv_fixlimit != NULL) 1150 p->p_sysent->sv_fixlimit(rlp, which); 1151 } 1152 1153 /* 1154 * Find the uidinfo structure for a uid. This structure is used to 1155 * track the total resource consumption (process count, socket buffer 1156 * size, etc.) for the uid and impose limits. 1157 */ 1158 void 1159 uihashinit() 1160 { 1161 1162 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 1163 rw_init(&uihashtbl_lock, "uidinfo hash"); 1164 } 1165 1166 /* 1167 * Look up a uidinfo struct for the parameter uid. 1168 * uihashtbl_lock must be locked. 1169 */ 1170 static struct uidinfo * 1171 uilookup(uid) 1172 uid_t uid; 1173 { 1174 struct uihashhead *uipp; 1175 struct uidinfo *uip; 1176 1177 rw_assert(&uihashtbl_lock, RA_LOCKED); 1178 uipp = UIHASH(uid); 1179 LIST_FOREACH(uip, uipp, ui_hash) 1180 if (uip->ui_uid == uid) 1181 break; 1182 1183 return (uip); 1184 } 1185 1186 /* 1187 * Find or allocate a struct uidinfo for a particular uid. 1188 * Increase refcount on uidinfo struct returned. 1189 * uifree() should be called on a struct uidinfo when released. 1190 */ 1191 struct uidinfo * 1192 uifind(uid) 1193 uid_t uid; 1194 { 1195 struct uidinfo *old_uip, *uip; 1196 1197 rw_rlock(&uihashtbl_lock); 1198 uip = uilookup(uid); 1199 if (uip == NULL) { 1200 rw_runlock(&uihashtbl_lock); 1201 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); 1202 rw_wlock(&uihashtbl_lock); 1203 /* 1204 * There's a chance someone created our uidinfo while we 1205 * were in malloc and not holding the lock, so we have to 1206 * make sure we don't insert a duplicate uidinfo. 1207 */ 1208 if ((old_uip = uilookup(uid)) != NULL) { 1209 /* Someone else beat us to it. */ 1210 free(uip, M_UIDINFO); 1211 uip = old_uip; 1212 } else { 1213 refcount_init(&uip->ui_ref, 0); 1214 uip->ui_uid = uid; 1215 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 1216 } 1217 } 1218 uihold(uip); 1219 rw_unlock(&uihashtbl_lock); 1220 return (uip); 1221 } 1222 1223 /* 1224 * Place another refcount on a uidinfo struct. 1225 */ 1226 void 1227 uihold(uip) 1228 struct uidinfo *uip; 1229 { 1230 1231 refcount_acquire(&uip->ui_ref); 1232 } 1233 1234 /*- 1235 * Since uidinfo structs have a long lifetime, we use an 1236 * opportunistic refcounting scheme to avoid locking the lookup hash 1237 * for each release. 1238 * 1239 * If the refcount hits 0, we need to free the structure, 1240 * which means we need to lock the hash. 1241 * Optimal case: 1242 * After locking the struct and lowering the refcount, if we find 1243 * that we don't need to free, simply unlock and return. 1244 * Suboptimal case: 1245 * If refcount lowering results in need to free, bump the count 1246 * back up, lose the lock and acquire the locks in the proper 1247 * order to try again. 1248 */ 1249 void 1250 uifree(uip) 1251 struct uidinfo *uip; 1252 { 1253 int old; 1254 1255 /* Prepare for optimal case. */ 1256 old = uip->ui_ref; 1257 if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1)) 1258 return; 1259 1260 /* Prepare for suboptimal case. */ 1261 rw_wlock(&uihashtbl_lock); 1262 if (refcount_release(&uip->ui_ref)) { 1263 LIST_REMOVE(uip, ui_hash); 1264 rw_wunlock(&uihashtbl_lock); 1265 if (uip->ui_sbsize != 0) 1266 printf("freeing uidinfo: uid = %d, sbsize = %ld\n", 1267 uip->ui_uid, uip->ui_sbsize); 1268 if (uip->ui_proccnt != 0) 1269 printf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1270 uip->ui_uid, uip->ui_proccnt); 1271 FREE(uip, M_UIDINFO); 1272 return; 1273 } 1274 /* 1275 * Someone added a reference between atomic_cmpset_int() and 1276 * rw_wlock(&uihashtbl_lock). 1277 */ 1278 rw_wunlock(&uihashtbl_lock); 1279 } 1280 1281 /* 1282 * Change the count associated with number of processes 1283 * a given user is using. When 'max' is 0, don't enforce a limit 1284 */ 1285 int 1286 chgproccnt(uip, diff, max) 1287 struct uidinfo *uip; 1288 int diff; 1289 rlim_t max; 1290 { 1291 1292 /* Don't allow them to exceed max, but allow subtraction. */ 1293 if (diff > 0 && max != 0) { 1294 if (atomic_fetchadd_long(&uip->ui_proccnt, (long)diff) + diff > max) { 1295 atomic_subtract_long(&uip->ui_proccnt, (long)diff); 1296 return (0); 1297 } 1298 } else { 1299 atomic_add_long(&uip->ui_proccnt, (long)diff); 1300 if (uip->ui_proccnt < 0) 1301 printf("negative proccnt for uid = %d\n", uip->ui_uid); 1302 } 1303 return (1); 1304 } 1305 1306 /* 1307 * Change the total socket buffer size a user has used. 1308 */ 1309 int 1310 chgsbsize(uip, hiwat, to, max) 1311 struct uidinfo *uip; 1312 u_int *hiwat; 1313 u_int to; 1314 rlim_t max; 1315 { 1316 int diff; 1317 1318 diff = to - *hiwat; 1319 if (diff > 0) { 1320 if (atomic_fetchadd_long(&uip->ui_sbsize, (long)diff) + diff > max) { 1321 atomic_subtract_long(&uip->ui_sbsize, (long)diff); 1322 return (0); 1323 } 1324 } else { 1325 atomic_add_long(&uip->ui_sbsize, (long)diff); 1326 if (uip->ui_sbsize < 0) 1327 printf("negative sbsize for uid = %d\n", uip->ui_uid); 1328 } 1329 *hiwat = to; 1330 return (1); 1331 } 1332