1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/sysproto.h> 45 #include <sys/file.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mutex.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/refcount.h> 53 #include <sys/resourcevar.h> 54 #include <sys/sched.h> 55 #include <sys/sx.h> 56 #include <sys/syscallsubr.h> 57 #include <sys/sysent.h> 58 #include <sys/time.h> 59 60 #include <vm/vm.h> 61 #include <vm/vm_param.h> 62 #include <vm/pmap.h> 63 #include <vm/vm_map.h> 64 65 66 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); 67 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); 68 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 69 static struct mtx uihashtbl_mtx; 70 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; 71 static u_long uihash; /* size of hash table - 1 */ 72 73 static void calcru1(struct proc *p, struct rusage_ext *ruxp, 74 struct timeval *up, struct timeval *sp); 75 static int donice(struct thread *td, struct proc *chgp, int n); 76 static struct uidinfo *uilookup(uid_t uid); 77 78 /* 79 * Resource controls and accounting. 80 */ 81 82 #ifndef _SYS_SYSPROTO_H_ 83 struct getpriority_args { 84 int which; 85 int who; 86 }; 87 #endif 88 /* 89 * MPSAFE 90 */ 91 int 92 getpriority(td, uap) 93 struct thread *td; 94 register struct getpriority_args *uap; 95 { 96 struct proc *p; 97 struct pgrp *pg; 98 int error, low; 99 100 error = 0; 101 low = PRIO_MAX + 1; 102 switch (uap->which) { 103 104 case PRIO_PROCESS: 105 if (uap->who == 0) 106 low = td->td_proc->p_nice; 107 else { 108 p = pfind(uap->who); 109 if (p == NULL) 110 break; 111 if (p_cansee(td, p) == 0) 112 low = p->p_nice; 113 PROC_UNLOCK(p); 114 } 115 break; 116 117 case PRIO_PGRP: 118 sx_slock(&proctree_lock); 119 if (uap->who == 0) { 120 pg = td->td_proc->p_pgrp; 121 PGRP_LOCK(pg); 122 } else { 123 pg = pgfind(uap->who); 124 if (pg == NULL) { 125 sx_sunlock(&proctree_lock); 126 break; 127 } 128 } 129 sx_sunlock(&proctree_lock); 130 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 131 PROC_LOCK(p); 132 if (!p_cansee(td, p)) { 133 if (p->p_nice < low) 134 low = p->p_nice; 135 } 136 PROC_UNLOCK(p); 137 } 138 PGRP_UNLOCK(pg); 139 break; 140 141 case PRIO_USER: 142 if (uap->who == 0) 143 uap->who = td->td_ucred->cr_uid; 144 sx_slock(&allproc_lock); 145 FOREACH_PROC_IN_SYSTEM(p) { 146 /* Do not bother to check PRS_NEW processes */ 147 if (p->p_state == PRS_NEW) 148 continue; 149 PROC_LOCK(p); 150 if (!p_cansee(td, p) && 151 p->p_ucred->cr_uid == uap->who) { 152 if (p->p_nice < low) 153 low = p->p_nice; 154 } 155 PROC_UNLOCK(p); 156 } 157 sx_sunlock(&allproc_lock); 158 break; 159 160 default: 161 error = EINVAL; 162 break; 163 } 164 if (low == PRIO_MAX + 1 && error == 0) 165 error = ESRCH; 166 td->td_retval[0] = low; 167 return (error); 168 } 169 170 #ifndef _SYS_SYSPROTO_H_ 171 struct setpriority_args { 172 int which; 173 int who; 174 int prio; 175 }; 176 #endif 177 /* 178 * MPSAFE 179 */ 180 int 181 setpriority(td, uap) 182 struct thread *td; 183 struct setpriority_args *uap; 184 { 185 struct proc *curp, *p; 186 struct pgrp *pg; 187 int found = 0, error = 0; 188 189 curp = td->td_proc; 190 switch (uap->which) { 191 case PRIO_PROCESS: 192 if (uap->who == 0) { 193 PROC_LOCK(curp); 194 error = donice(td, curp, uap->prio); 195 PROC_UNLOCK(curp); 196 } else { 197 p = pfind(uap->who); 198 if (p == 0) 199 break; 200 if (p_cansee(td, p) == 0) 201 error = donice(td, p, uap->prio); 202 PROC_UNLOCK(p); 203 } 204 found++; 205 break; 206 207 case PRIO_PGRP: 208 sx_slock(&proctree_lock); 209 if (uap->who == 0) { 210 pg = curp->p_pgrp; 211 PGRP_LOCK(pg); 212 } else { 213 pg = pgfind(uap->who); 214 if (pg == NULL) { 215 sx_sunlock(&proctree_lock); 216 break; 217 } 218 } 219 sx_sunlock(&proctree_lock); 220 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 221 PROC_LOCK(p); 222 if (!p_cansee(td, p)) { 223 error = donice(td, p, uap->prio); 224 found++; 225 } 226 PROC_UNLOCK(p); 227 } 228 PGRP_UNLOCK(pg); 229 break; 230 231 case PRIO_USER: 232 if (uap->who == 0) 233 uap->who = td->td_ucred->cr_uid; 234 sx_slock(&allproc_lock); 235 FOREACH_PROC_IN_SYSTEM(p) { 236 PROC_LOCK(p); 237 if (p->p_ucred->cr_uid == uap->who && 238 !p_cansee(td, p)) { 239 error = donice(td, p, uap->prio); 240 found++; 241 } 242 PROC_UNLOCK(p); 243 } 244 sx_sunlock(&allproc_lock); 245 break; 246 247 default: 248 error = EINVAL; 249 break; 250 } 251 if (found == 0 && error == 0) 252 error = ESRCH; 253 return (error); 254 } 255 256 /* 257 * Set "nice" for a (whole) process. 258 */ 259 static int 260 donice(struct thread *td, struct proc *p, int n) 261 { 262 int error; 263 264 PROC_LOCK_ASSERT(p, MA_OWNED); 265 if ((error = p_cansched(td, p))) 266 return (error); 267 if (n > PRIO_MAX) 268 n = PRIO_MAX; 269 if (n < PRIO_MIN) 270 n = PRIO_MIN; 271 if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) 272 return (EACCES); 273 mtx_lock_spin(&sched_lock); 274 sched_nice(p, n); 275 mtx_unlock_spin(&sched_lock); 276 return (0); 277 } 278 279 /* 280 * Set realtime priority for LWP. 281 * 282 * MPSAFE 283 */ 284 #ifndef _SYS_SYSPROTO_H_ 285 struct rtprio_thread_args { 286 int function; 287 lwpid_t lwpid; 288 struct rtprio *rtp; 289 }; 290 #endif 291 292 int 293 rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) 294 { 295 struct proc *curp; 296 struct proc *p; 297 struct rtprio rtp; 298 struct thread *td1; 299 int cierror, error; 300 301 /* Perform copyin before acquiring locks if needed. */ 302 if (uap->function == RTP_SET) 303 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 304 else 305 cierror = 0; 306 307 curp = td->td_proc; 308 /* 309 * Though lwpid is unique, only current process is supported 310 * since there is no efficient way to look up a LWP yet. 311 */ 312 p = curp; 313 PROC_LOCK(p); 314 315 switch (uap->function) { 316 case RTP_LOOKUP: 317 if ((error = p_cansee(td, p))) 318 break; 319 mtx_lock_spin(&sched_lock); 320 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 321 td1 = td; 322 else 323 td1 = thread_find(p, uap->lwpid); 324 if (td1 != NULL) 325 pri_to_rtp(td1, &rtp); 326 else 327 error = ESRCH; 328 mtx_unlock_spin(&sched_lock); 329 PROC_UNLOCK(p); 330 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 331 case RTP_SET: 332 if ((error = p_cansched(td, p)) || (error = cierror)) 333 break; 334 335 /* Disallow setting rtprio in most cases if not superuser. */ 336 if (priv_check(td, PRIV_SCHED_RTPRIO) != 0) { 337 /* can't set realtime priority */ 338 /* 339 * Realtime priority has to be restricted for reasons which should be 340 * obvious. However, for idle priority, there is a potential for 341 * system deadlock if an idleprio process gains a lock on a resource 342 * that other processes need (and the idleprio process can't run 343 * due to a CPU-bound normal process). Fix me! XXX 344 */ 345 #if 0 346 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 347 #else 348 if (rtp.type != RTP_PRIO_NORMAL) { 349 #endif 350 error = EPERM; 351 break; 352 } 353 } 354 355 mtx_lock_spin(&sched_lock); 356 if (uap->lwpid == 0 || uap->lwpid == td->td_tid) 357 td1 = td; 358 else 359 td1 = thread_find(p, uap->lwpid); 360 if (td1 != NULL) 361 error = rtp_to_pri(&rtp, td1); 362 else 363 error = ESRCH; 364 mtx_unlock_spin(&sched_lock); 365 break; 366 default: 367 error = EINVAL; 368 break; 369 } 370 PROC_UNLOCK(p); 371 return (error); 372 } 373 374 /* 375 * Set realtime priority. 376 * 377 * MPSAFE 378 */ 379 #ifndef _SYS_SYSPROTO_H_ 380 struct rtprio_args { 381 int function; 382 pid_t pid; 383 struct rtprio *rtp; 384 }; 385 #endif 386 387 int 388 rtprio(td, uap) 389 struct thread *td; /* curthread */ 390 register struct rtprio_args *uap; 391 { 392 struct proc *curp; 393 struct proc *p; 394 struct thread *tdp; 395 struct rtprio rtp; 396 int cierror, error; 397 398 /* Perform copyin before acquiring locks if needed. */ 399 if (uap->function == RTP_SET) 400 cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); 401 else 402 cierror = 0; 403 404 curp = td->td_proc; 405 if (uap->pid == 0) { 406 p = curp; 407 PROC_LOCK(p); 408 } else { 409 p = pfind(uap->pid); 410 if (p == NULL) 411 return (ESRCH); 412 } 413 414 switch (uap->function) { 415 case RTP_LOOKUP: 416 if ((error = p_cansee(td, p))) 417 break; 418 mtx_lock_spin(&sched_lock); 419 /* 420 * Return OUR priority if no pid specified, 421 * or if one is, report the highest priority 422 * in the process. There isn't much more you can do as 423 * there is only room to return a single priority. 424 * XXXKSE: maybe need a new interface to report 425 * priorities of multiple system scope threads. 426 * Note: specifying our own pid is not the same 427 * as leaving it zero. 428 */ 429 if (uap->pid == 0) { 430 pri_to_rtp(td, &rtp); 431 } else { 432 struct rtprio rtp2; 433 434 rtp.type = RTP_PRIO_IDLE; 435 rtp.prio = RTP_PRIO_MAX; 436 FOREACH_THREAD_IN_PROC(p, tdp) { 437 pri_to_rtp(tdp, &rtp2); 438 if (rtp2.type < rtp.type || 439 (rtp2.type == rtp.type && 440 rtp2.prio < rtp.prio)) { 441 rtp.type = rtp2.type; 442 rtp.prio = rtp2.prio; 443 } 444 } 445 } 446 mtx_unlock_spin(&sched_lock); 447 PROC_UNLOCK(p); 448 return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); 449 case RTP_SET: 450 if ((error = p_cansched(td, p)) || (error = cierror)) 451 break; 452 453 /* Disallow setting rtprio in most cases if not superuser. */ 454 if (priv_check(td, PRIV_SCHED_RTPRIO) != 0) { 455 /* can't set someone else's */ 456 if (uap->pid) { 457 error = EPERM; 458 break; 459 } 460 /* can't set realtime priority */ 461 /* 462 * Realtime priority has to be restricted for reasons which should be 463 * obvious. However, for idle priority, there is a potential for 464 * system deadlock if an idleprio process gains a lock on a resource 465 * that other processes need (and the idleprio process can't run 466 * due to a CPU-bound normal process). Fix me! XXX 467 */ 468 #if 0 469 if (RTP_PRIO_IS_REALTIME(rtp.type)) { 470 #else 471 if (rtp.type != RTP_PRIO_NORMAL) { 472 #endif 473 error = EPERM; 474 break; 475 } 476 } 477 478 /* 479 * If we are setting our own priority, set just our 480 * thread but if we are doing another process, 481 * do all the threads on that process. If we 482 * specify our own pid we do the latter. 483 */ 484 mtx_lock_spin(&sched_lock); 485 if (uap->pid == 0) { 486 error = rtp_to_pri(&rtp, td); 487 } else { 488 FOREACH_THREAD_IN_PROC(p, td) { 489 if ((error = rtp_to_pri(&rtp, td)) != 0) 490 break; 491 } 492 } 493 mtx_unlock_spin(&sched_lock); 494 break; 495 default: 496 error = EINVAL; 497 break; 498 } 499 PROC_UNLOCK(p); 500 return (error); 501 } 502 503 int 504 rtp_to_pri(struct rtprio *rtp, struct thread *td) 505 { 506 u_char newpri; 507 508 mtx_assert(&sched_lock, MA_OWNED); 509 if (rtp->prio > RTP_PRIO_MAX) 510 return (EINVAL); 511 switch (RTP_PRIO_BASE(rtp->type)) { 512 case RTP_PRIO_REALTIME: 513 newpri = PRI_MIN_REALTIME + rtp->prio; 514 break; 515 case RTP_PRIO_NORMAL: 516 newpri = PRI_MIN_TIMESHARE + rtp->prio; 517 break; 518 case RTP_PRIO_IDLE: 519 newpri = PRI_MIN_IDLE + rtp->prio; 520 break; 521 default: 522 return (EINVAL); 523 } 524 sched_class(td, rtp->type); /* XXX fix */ 525 sched_user_prio(td, newpri); 526 if (curthread == td) 527 sched_prio(curthread, td->td_user_pri); /* XXX dubious */ 528 return (0); 529 } 530 531 void 532 pri_to_rtp(struct thread *td, struct rtprio *rtp) 533 { 534 535 mtx_assert(&sched_lock, MA_OWNED); 536 switch (PRI_BASE(td->td_pri_class)) { 537 case PRI_REALTIME: 538 rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; 539 break; 540 case PRI_TIMESHARE: 541 rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; 542 break; 543 case PRI_IDLE: 544 rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; 545 break; 546 default: 547 break; 548 } 549 rtp->type = td->td_pri_class; 550 } 551 552 #if defined(COMPAT_43) 553 #ifndef _SYS_SYSPROTO_H_ 554 struct osetrlimit_args { 555 u_int which; 556 struct orlimit *rlp; 557 }; 558 #endif 559 /* 560 * MPSAFE 561 */ 562 int 563 osetrlimit(td, uap) 564 struct thread *td; 565 register struct osetrlimit_args *uap; 566 { 567 struct orlimit olim; 568 struct rlimit lim; 569 int error; 570 571 if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) 572 return (error); 573 lim.rlim_cur = olim.rlim_cur; 574 lim.rlim_max = olim.rlim_max; 575 error = kern_setrlimit(td, uap->which, &lim); 576 return (error); 577 } 578 579 #ifndef _SYS_SYSPROTO_H_ 580 struct ogetrlimit_args { 581 u_int which; 582 struct orlimit *rlp; 583 }; 584 #endif 585 /* 586 * MPSAFE 587 */ 588 int 589 ogetrlimit(td, uap) 590 struct thread *td; 591 register struct ogetrlimit_args *uap; 592 { 593 struct orlimit olim; 594 struct rlimit rl; 595 struct proc *p; 596 int error; 597 598 if (uap->which >= RLIM_NLIMITS) 599 return (EINVAL); 600 p = td->td_proc; 601 PROC_LOCK(p); 602 lim_rlimit(p, uap->which, &rl); 603 PROC_UNLOCK(p); 604 605 /* 606 * XXX would be more correct to convert only RLIM_INFINITY to the 607 * old RLIM_INFINITY and fail with EOVERFLOW for other larger 608 * values. Most 64->32 and 32->16 conversions, including not 609 * unimportant ones of uids are even more broken than what we 610 * do here (they blindly truncate). We don't do this correctly 611 * here since we have little experience with EOVERFLOW yet. 612 * Elsewhere, getuid() can't fail... 613 */ 614 olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; 615 olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; 616 error = copyout(&olim, uap->rlp, sizeof(olim)); 617 return (error); 618 } 619 #endif /* COMPAT_43 */ 620 621 #ifndef _SYS_SYSPROTO_H_ 622 struct __setrlimit_args { 623 u_int which; 624 struct rlimit *rlp; 625 }; 626 #endif 627 /* 628 * MPSAFE 629 */ 630 int 631 setrlimit(td, uap) 632 struct thread *td; 633 register struct __setrlimit_args *uap; 634 { 635 struct rlimit alim; 636 int error; 637 638 if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) 639 return (error); 640 error = kern_setrlimit(td, uap->which, &alim); 641 return (error); 642 } 643 644 int 645 kern_setrlimit(td, which, limp) 646 struct thread *td; 647 u_int which; 648 struct rlimit *limp; 649 { 650 struct plimit *newlim, *oldlim; 651 struct proc *p; 652 register struct rlimit *alimp; 653 rlim_t oldssiz; 654 int error; 655 656 if (which >= RLIM_NLIMITS) 657 return (EINVAL); 658 659 /* 660 * Preserve historical bugs by treating negative limits as unsigned. 661 */ 662 if (limp->rlim_cur < 0) 663 limp->rlim_cur = RLIM_INFINITY; 664 if (limp->rlim_max < 0) 665 limp->rlim_max = RLIM_INFINITY; 666 667 oldssiz = 0; 668 p = td->td_proc; 669 newlim = lim_alloc(); 670 PROC_LOCK(p); 671 oldlim = p->p_limit; 672 alimp = &oldlim->pl_rlimit[which]; 673 if (limp->rlim_cur > alimp->rlim_max || 674 limp->rlim_max > alimp->rlim_max) 675 if ((error = priv_check_cred(td->td_ucred, 676 PRIV_PROC_SETRLIMIT, SUSER_ALLOWJAIL))) { 677 PROC_UNLOCK(p); 678 lim_free(newlim); 679 return (error); 680 } 681 if (limp->rlim_cur > limp->rlim_max) 682 limp->rlim_cur = limp->rlim_max; 683 lim_copy(newlim, oldlim); 684 alimp = &newlim->pl_rlimit[which]; 685 686 switch (which) { 687 688 case RLIMIT_CPU: 689 mtx_lock_spin(&sched_lock); 690 p->p_cpulimit = limp->rlim_cur; 691 mtx_unlock_spin(&sched_lock); 692 break; 693 case RLIMIT_DATA: 694 if (limp->rlim_cur > maxdsiz) 695 limp->rlim_cur = maxdsiz; 696 if (limp->rlim_max > maxdsiz) 697 limp->rlim_max = maxdsiz; 698 break; 699 700 case RLIMIT_STACK: 701 if (limp->rlim_cur > maxssiz) 702 limp->rlim_cur = maxssiz; 703 if (limp->rlim_max > maxssiz) 704 limp->rlim_max = maxssiz; 705 oldssiz = alimp->rlim_cur; 706 break; 707 708 case RLIMIT_NOFILE: 709 if (limp->rlim_cur > maxfilesperproc) 710 limp->rlim_cur = maxfilesperproc; 711 if (limp->rlim_max > maxfilesperproc) 712 limp->rlim_max = maxfilesperproc; 713 break; 714 715 case RLIMIT_NPROC: 716 if (limp->rlim_cur > maxprocperuid) 717 limp->rlim_cur = maxprocperuid; 718 if (limp->rlim_max > maxprocperuid) 719 limp->rlim_max = maxprocperuid; 720 if (limp->rlim_cur < 1) 721 limp->rlim_cur = 1; 722 if (limp->rlim_max < 1) 723 limp->rlim_max = 1; 724 break; 725 } 726 *alimp = *limp; 727 p->p_limit = newlim; 728 PROC_UNLOCK(p); 729 lim_free(oldlim); 730 731 if (which == RLIMIT_STACK) { 732 /* 733 * Stack is allocated to the max at exec time with only 734 * "rlim_cur" bytes accessible. If stack limit is going 735 * up make more accessible, if going down make inaccessible. 736 */ 737 if (limp->rlim_cur != oldssiz) { 738 vm_offset_t addr; 739 vm_size_t size; 740 vm_prot_t prot; 741 742 if (limp->rlim_cur > oldssiz) { 743 prot = p->p_sysent->sv_stackprot; 744 size = limp->rlim_cur - oldssiz; 745 addr = p->p_sysent->sv_usrstack - 746 limp->rlim_cur; 747 } else { 748 prot = VM_PROT_NONE; 749 size = oldssiz - limp->rlim_cur; 750 addr = p->p_sysent->sv_usrstack - oldssiz; 751 } 752 addr = trunc_page(addr); 753 size = round_page(size); 754 (void)vm_map_protect(&p->p_vmspace->vm_map, 755 addr, addr + size, prot, FALSE); 756 } 757 } 758 759 /* 760 * The data size limit may need to be changed to a value 761 * that makes sense for the 32 bit binary. 762 */ 763 if (p->p_sysent->sv_fixlimits != NULL) 764 p->p_sysent->sv_fixlimits(p); 765 return (0); 766 } 767 768 #ifndef _SYS_SYSPROTO_H_ 769 struct __getrlimit_args { 770 u_int which; 771 struct rlimit *rlp; 772 }; 773 #endif 774 /* 775 * MPSAFE 776 */ 777 /* ARGSUSED */ 778 int 779 getrlimit(td, uap) 780 struct thread *td; 781 register struct __getrlimit_args *uap; 782 { 783 struct rlimit rlim; 784 struct proc *p; 785 int error; 786 787 if (uap->which >= RLIM_NLIMITS) 788 return (EINVAL); 789 p = td->td_proc; 790 PROC_LOCK(p); 791 lim_rlimit(p, uap->which, &rlim); 792 PROC_UNLOCK(p); 793 error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); 794 return (error); 795 } 796 797 /* 798 * Transform the running time and tick information for children of proc p 799 * into user and system time usage. 800 */ 801 void 802 calccru(p, up, sp) 803 struct proc *p; 804 struct timeval *up; 805 struct timeval *sp; 806 { 807 808 PROC_LOCK_ASSERT(p, MA_OWNED); 809 calcru1(p, &p->p_crux, up, sp); 810 } 811 812 /* 813 * Transform the running time and tick information in proc p into user 814 * and system time usage. If appropriate, include the current time slice 815 * on this CPU. 816 */ 817 void 818 calcru(struct proc *p, struct timeval *up, struct timeval *sp) 819 { 820 struct rusage_ext rux; 821 struct thread *td; 822 uint64_t u; 823 824 PROC_LOCK_ASSERT(p, MA_OWNED); 825 mtx_assert(&sched_lock, MA_NOTOWNED); 826 mtx_lock_spin(&sched_lock); 827 828 /* 829 * If we are getting stats for the current process, then add in the 830 * stats that this thread has accumulated in its current time slice. 831 * We reset the thread and CPU state as if we had performed a context 832 * switch right here. 833 */ 834 if (curthread->td_proc == p) { 835 td = curthread; 836 u = cpu_ticks(); 837 p->p_rux.rux_runtime += u - PCPU_GET(switchtime); 838 PCPU_SET(switchtime, u); 839 p->p_rux.rux_uticks += td->td_uticks; 840 td->td_uticks = 0; 841 p->p_rux.rux_iticks += td->td_iticks; 842 td->td_iticks = 0; 843 p->p_rux.rux_sticks += td->td_sticks; 844 td->td_sticks = 0; 845 } 846 /* Work on a copy of p_rux so we can let go of sched_lock */ 847 rux = p->p_rux; 848 mtx_unlock_spin(&sched_lock); 849 calcru1(p, &rux, up, sp); 850 /* Update the result from the p_rux copy */ 851 p->p_rux.rux_uu = rux.rux_uu; 852 p->p_rux.rux_su = rux.rux_su; 853 p->p_rux.rux_tu = rux.rux_tu; 854 } 855 856 static void 857 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, 858 struct timeval *sp) 859 { 860 /* {user, system, interrupt, total} {ticks, usec}: */ 861 u_int64_t ut, uu, st, su, it, tt, tu; 862 863 ut = ruxp->rux_uticks; 864 st = ruxp->rux_sticks; 865 it = ruxp->rux_iticks; 866 tt = ut + st + it; 867 if (tt == 0) { 868 /* Avoid divide by zero */ 869 st = 1; 870 tt = 1; 871 } 872 tu = cputick2usec(ruxp->rux_runtime); 873 if ((int64_t)tu < 0) { 874 /* XXX: this should be an assert /phk */ 875 printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", 876 (intmax_t)tu, p->p_pid, p->p_comm); 877 tu = ruxp->rux_tu; 878 } 879 880 if (tu >= ruxp->rux_tu) { 881 /* 882 * The normal case, time increased. 883 * Enforce monotonicity of bucketed numbers. 884 */ 885 uu = (tu * ut) / tt; 886 if (uu < ruxp->rux_uu) 887 uu = ruxp->rux_uu; 888 su = (tu * st) / tt; 889 if (su < ruxp->rux_su) 890 su = ruxp->rux_su; 891 } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { 892 /* 893 * When we calibrate the cputicker, it is not uncommon to 894 * see the presumably fixed frequency increase slightly over 895 * time as a result of thermal stabilization and NTP 896 * discipline (of the reference clock). We therefore ignore 897 * a bit of backwards slop because we expect to catch up 898 * shortly. We use a 3 microsecond limit to catch low 899 * counts and a 1% limit for high counts. 900 */ 901 uu = ruxp->rux_uu; 902 su = ruxp->rux_su; 903 tu = ruxp->rux_tu; 904 } else { /* tu < ruxp->rux_tu */ 905 /* 906 * What happene here was likely that a laptop, which ran at 907 * a reduced clock frequency at boot, kicked into high gear. 908 * The wisdom of spamming this message in that case is 909 * dubious, but it might also be indicative of something 910 * serious, so lets keep it and hope laptops can be made 911 * more truthful about their CPU speed via ACPI. 912 */ 913 printf("calcru: runtime went backwards from %ju usec " 914 "to %ju usec for pid %d (%s)\n", 915 (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, 916 p->p_pid, p->p_comm); 917 uu = (tu * ut) / tt; 918 su = (tu * st) / tt; 919 } 920 921 ruxp->rux_uu = uu; 922 ruxp->rux_su = su; 923 ruxp->rux_tu = tu; 924 925 up->tv_sec = uu / 1000000; 926 up->tv_usec = uu % 1000000; 927 sp->tv_sec = su / 1000000; 928 sp->tv_usec = su % 1000000; 929 } 930 931 #ifndef _SYS_SYSPROTO_H_ 932 struct getrusage_args { 933 int who; 934 struct rusage *rusage; 935 }; 936 #endif 937 /* 938 * MPSAFE 939 */ 940 int 941 getrusage(td, uap) 942 register struct thread *td; 943 register struct getrusage_args *uap; 944 { 945 struct rusage ru; 946 int error; 947 948 error = kern_getrusage(td, uap->who, &ru); 949 if (error == 0) 950 error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 951 return (error); 952 } 953 954 int 955 kern_getrusage(td, who, rup) 956 struct thread *td; 957 int who; 958 struct rusage *rup; 959 { 960 struct proc *p; 961 962 p = td->td_proc; 963 PROC_LOCK(p); 964 switch (who) { 965 966 case RUSAGE_SELF: 967 *rup = p->p_stats->p_ru; 968 calcru(p, &rup->ru_utime, &rup->ru_stime); 969 break; 970 971 case RUSAGE_CHILDREN: 972 *rup = p->p_stats->p_cru; 973 calccru(p, &rup->ru_utime, &rup->ru_stime); 974 break; 975 976 default: 977 PROC_UNLOCK(p); 978 return (EINVAL); 979 } 980 PROC_UNLOCK(p); 981 return (0); 982 } 983 984 void 985 ruadd(ru, rux, ru2, rux2) 986 struct rusage *ru; 987 struct rusage_ext *rux; 988 struct rusage *ru2; 989 struct rusage_ext *rux2; 990 { 991 register long *ip, *ip2; 992 register int i; 993 994 rux->rux_runtime += rux2->rux_runtime; 995 rux->rux_uticks += rux2->rux_uticks; 996 rux->rux_sticks += rux2->rux_sticks; 997 rux->rux_iticks += rux2->rux_iticks; 998 rux->rux_uu += rux2->rux_uu; 999 rux->rux_su += rux2->rux_su; 1000 rux->rux_tu += rux2->rux_tu; 1001 if (ru->ru_maxrss < ru2->ru_maxrss) 1002 ru->ru_maxrss = ru2->ru_maxrss; 1003 ip = &ru->ru_first; 1004 ip2 = &ru2->ru_first; 1005 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 1006 *ip++ += *ip2++; 1007 } 1008 1009 /* 1010 * Allocate a new resource limits structure and initialize its 1011 * reference count and mutex pointer. 1012 */ 1013 struct plimit * 1014 lim_alloc() 1015 { 1016 struct plimit *limp; 1017 1018 limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); 1019 refcount_init(&limp->pl_refcnt, 1); 1020 return (limp); 1021 } 1022 1023 struct plimit * 1024 lim_hold(limp) 1025 struct plimit *limp; 1026 { 1027 1028 refcount_acquire(&limp->pl_refcnt); 1029 return (limp); 1030 } 1031 1032 void 1033 lim_free(limp) 1034 struct plimit *limp; 1035 { 1036 1037 KASSERT(limp->pl_refcnt > 0, ("plimit refcnt underflow")); 1038 if (refcount_release(&limp->pl_refcnt)) 1039 free((void *)limp, M_PLIMIT); 1040 } 1041 1042 /* 1043 * Make a copy of the plimit structure. 1044 * We share these structures copy-on-write after fork. 1045 */ 1046 void 1047 lim_copy(dst, src) 1048 struct plimit *dst, *src; 1049 { 1050 1051 KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit")); 1052 bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); 1053 } 1054 1055 /* 1056 * Return the hard limit for a particular system resource. The 1057 * which parameter specifies the index into the rlimit array. 1058 */ 1059 rlim_t 1060 lim_max(struct proc *p, int which) 1061 { 1062 struct rlimit rl; 1063 1064 lim_rlimit(p, which, &rl); 1065 return (rl.rlim_max); 1066 } 1067 1068 /* 1069 * Return the current (soft) limit for a particular system resource. 1070 * The which parameter which specifies the index into the rlimit array 1071 */ 1072 rlim_t 1073 lim_cur(struct proc *p, int which) 1074 { 1075 struct rlimit rl; 1076 1077 lim_rlimit(p, which, &rl); 1078 return (rl.rlim_cur); 1079 } 1080 1081 /* 1082 * Return a copy of the entire rlimit structure for the system limit 1083 * specified by 'which' in the rlimit structure pointed to by 'rlp'. 1084 */ 1085 void 1086 lim_rlimit(struct proc *p, int which, struct rlimit *rlp) 1087 { 1088 1089 PROC_LOCK_ASSERT(p, MA_OWNED); 1090 KASSERT(which >= 0 && which < RLIM_NLIMITS, 1091 ("request for invalid resource limit")); 1092 *rlp = p->p_limit->pl_rlimit[which]; 1093 } 1094 1095 /* 1096 * Find the uidinfo structure for a uid. This structure is used to 1097 * track the total resource consumption (process count, socket buffer 1098 * size, etc.) for the uid and impose limits. 1099 */ 1100 void 1101 uihashinit() 1102 { 1103 1104 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); 1105 mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF); 1106 } 1107 1108 /* 1109 * Look up a uidinfo struct for the parameter uid. 1110 * uihashtbl_mtx must be locked. 1111 */ 1112 static struct uidinfo * 1113 uilookup(uid) 1114 uid_t uid; 1115 { 1116 struct uihashhead *uipp; 1117 struct uidinfo *uip; 1118 1119 mtx_assert(&uihashtbl_mtx, MA_OWNED); 1120 uipp = UIHASH(uid); 1121 LIST_FOREACH(uip, uipp, ui_hash) 1122 if (uip->ui_uid == uid) 1123 break; 1124 1125 return (uip); 1126 } 1127 1128 /* 1129 * Find or allocate a struct uidinfo for a particular uid. 1130 * Increase refcount on uidinfo struct returned. 1131 * uifree() should be called on a struct uidinfo when released. 1132 */ 1133 struct uidinfo * 1134 uifind(uid) 1135 uid_t uid; 1136 { 1137 struct uidinfo *old_uip, *uip; 1138 1139 mtx_lock(&uihashtbl_mtx); 1140 uip = uilookup(uid); 1141 if (uip == NULL) { 1142 mtx_unlock(&uihashtbl_mtx); 1143 uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); 1144 mtx_lock(&uihashtbl_mtx); 1145 /* 1146 * There's a chance someone created our uidinfo while we 1147 * were in malloc and not holding the lock, so we have to 1148 * make sure we don't insert a duplicate uidinfo. 1149 */ 1150 if ((old_uip = uilookup(uid)) != NULL) { 1151 /* Someone else beat us to it. */ 1152 free(uip, M_UIDINFO); 1153 uip = old_uip; 1154 } else { 1155 uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep); 1156 uip->ui_uid = uid; 1157 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); 1158 } 1159 } 1160 uihold(uip); 1161 mtx_unlock(&uihashtbl_mtx); 1162 return (uip); 1163 } 1164 1165 /* 1166 * Place another refcount on a uidinfo struct. 1167 */ 1168 void 1169 uihold(uip) 1170 struct uidinfo *uip; 1171 { 1172 1173 UIDINFO_LOCK(uip); 1174 uip->ui_ref++; 1175 UIDINFO_UNLOCK(uip); 1176 } 1177 1178 /*- 1179 * Since uidinfo structs have a long lifetime, we use an 1180 * opportunistic refcounting scheme to avoid locking the lookup hash 1181 * for each release. 1182 * 1183 * If the refcount hits 0, we need to free the structure, 1184 * which means we need to lock the hash. 1185 * Optimal case: 1186 * After locking the struct and lowering the refcount, if we find 1187 * that we don't need to free, simply unlock and return. 1188 * Suboptimal case: 1189 * If refcount lowering results in need to free, bump the count 1190 * back up, lose the lock and aquire the locks in the proper 1191 * order to try again. 1192 */ 1193 void 1194 uifree(uip) 1195 struct uidinfo *uip; 1196 { 1197 1198 /* Prepare for optimal case. */ 1199 UIDINFO_LOCK(uip); 1200 1201 if (--uip->ui_ref != 0) { 1202 UIDINFO_UNLOCK(uip); 1203 return; 1204 } 1205 1206 /* Prepare for suboptimal case. */ 1207 uip->ui_ref++; 1208 UIDINFO_UNLOCK(uip); 1209 mtx_lock(&uihashtbl_mtx); 1210 UIDINFO_LOCK(uip); 1211 1212 /* 1213 * We must subtract one from the count again because we backed out 1214 * our initial subtraction before dropping the lock. 1215 * Since another thread may have added a reference after we dropped the 1216 * initial lock we have to test for zero again. 1217 */ 1218 if (--uip->ui_ref == 0) { 1219 LIST_REMOVE(uip, ui_hash); 1220 mtx_unlock(&uihashtbl_mtx); 1221 if (uip->ui_sbsize != 0) 1222 printf("freeing uidinfo: uid = %d, sbsize = %jd\n", 1223 uip->ui_uid, (intmax_t)uip->ui_sbsize); 1224 if (uip->ui_proccnt != 0) 1225 printf("freeing uidinfo: uid = %d, proccnt = %ld\n", 1226 uip->ui_uid, uip->ui_proccnt); 1227 UIDINFO_UNLOCK(uip); 1228 FREE(uip, M_UIDINFO); 1229 return; 1230 } 1231 1232 mtx_unlock(&uihashtbl_mtx); 1233 UIDINFO_UNLOCK(uip); 1234 } 1235 1236 /* 1237 * Change the count associated with number of processes 1238 * a given user is using. When 'max' is 0, don't enforce a limit 1239 */ 1240 int 1241 chgproccnt(uip, diff, max) 1242 struct uidinfo *uip; 1243 int diff; 1244 int max; 1245 { 1246 1247 UIDINFO_LOCK(uip); 1248 /* Don't allow them to exceed max, but allow subtraction. */ 1249 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) { 1250 UIDINFO_UNLOCK(uip); 1251 return (0); 1252 } 1253 uip->ui_proccnt += diff; 1254 if (uip->ui_proccnt < 0) 1255 printf("negative proccnt for uid = %d\n", uip->ui_uid); 1256 UIDINFO_UNLOCK(uip); 1257 return (1); 1258 } 1259 1260 /* 1261 * Change the total socket buffer size a user has used. 1262 */ 1263 int 1264 chgsbsize(uip, hiwat, to, max) 1265 struct uidinfo *uip; 1266 u_int *hiwat; 1267 u_int to; 1268 rlim_t max; 1269 { 1270 rlim_t new; 1271 1272 UIDINFO_LOCK(uip); 1273 new = uip->ui_sbsize + to - *hiwat; 1274 /* Don't allow them to exceed max, but allow subtraction. */ 1275 if (to > *hiwat && new > max) { 1276 UIDINFO_UNLOCK(uip); 1277 return (0); 1278 } 1279 uip->ui_sbsize = new; 1280 UIDINFO_UNLOCK(uip); 1281 *hiwat = to; 1282 if (new < 0) 1283 printf("negative sbsize for uid = %d\n", uip->ui_uid); 1284 return (1); 1285 } 1286