1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/atomic.h> 30 #include <sys/cmn_err.h> 31 #include <sys/id_space.h> 32 #include <sys/kmem.h> 33 #include <sys/log.h> 34 #include <sys/modctl.h> 35 #include <sys/modhash.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/procset.h> 39 #include <sys/project.h> 40 #include <sys/resource.h> 41 #include <sys/rctl.h> 42 #include <sys/siginfo.h> 43 #include <sys/strlog.h> 44 #include <sys/systm.h> 45 #include <sys/task.h> 46 #include <sys/types.h> 47 #include <sys/policy.h> 48 #include <sys/zone.h> 49 50 /* 51 * Resource controls (rctls) 52 * 53 * The rctl subsystem provides a mechanism for kernel components to 54 * register their individual resource controls with the system as a whole, 55 * such that those controls can subscribe to specific actions while being 56 * associated with the various process-model entities provided by the kernel: 57 * the process, the task, the project, and the zone. (In principle, only 58 * minor modifications would be required to connect the resource control 59 * functionality to non-process-model entities associated with the system.) 60 * 61 * Subsystems register their rctls via rctl_register(). Subsystems 62 * also wishing to provide additional limits on a given rctl can modify 63 * them once they have the rctl handle. Each subsystem should store the 64 * handle to their rctl for direct access. 65 * 66 * A primary dictionary, rctl_dict, contains a hash of id to the default 67 * control definition for each controlled resource-entity pair on the system. 68 * A secondary dictionary, rctl_dict_by_name, contains a hash of name to 69 * resource control handles. The resource control handles are distributed by 70 * the rctl_ids ID space. The handles are private and not to be 71 * advertised to userland; all userland interactions are via the rctl 72 * names. 73 * 74 * Entities inherit their rctls from their predecessor. Since projects have 75 * no ancestor, they inherit their rctls from the rctl dict for project 76 * rctls. It is expected that project controls will be set to their 77 * appropriate values shortly after project creation, presumably from a 78 * policy source such as the project database. 79 * 80 * Data structures 81 * The rctl_set_t attached to each of the process model entities is a simple 82 * hash table keyed on the rctl handle assigned at registration. The entries 83 * in the hash table are rctl_t's, whose relationship with the active control 84 * values on that resource and with the global state of the resource we 85 * illustrate below: 86 * 87 * rctl_dict[key] --> rctl_dict_entry 88 * ^ 89 * | 90 * +--+---+ 91 * rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL 92 * +--+---+ ^ 93 * | | 94 * +------- cursor ------+ 95 * 96 * That is, the rctl contains a back pointer to the global resource control 97 * state for this resource, which is also available in the rctl_dict hash 98 * table mentioned earlier. The rctl contains two pointers to resource 99 * control values: one, values, indicates the entire sequence of control 100 * values; the other, cursor, indicates the currently active control 101 * value--the next value to be enforced. The value list itself is an open, 102 * doubly-linked list, the last non-NULL member of which is the system value 103 * for that resource (being the theoretical/conventional maximum allowable 104 * value for the resource on this OS instance). 105 * 106 * Ops Vector 107 * Subsystems publishing rctls need not provide instances of all of the 108 * functions specified by the ops vector. In particular, if general 109 * rctl_*() entry points are not being called, certain functions can be 110 * omitted. These align as follows: 111 * 112 * rctl_set() 113 * You may wish to provide a set callback if locking circumstances prevent 114 * it or if the performance cost of requesting the enforced value from the 115 * resource control is prohibitively expensive. For instance, the currently 116 * enforced file size limit is stored on the process in the p_fsz_ctl to 117 * maintain read()/write() performance. 118 * 119 * rctl_test() 120 * You must provide a test callback if you are using the rctl_test() 121 * interface. An action callback is optional. 122 * 123 * rctl_action() 124 * You may wish to provide an action callback. 125 * 126 * Registration 127 * New resource controls can be added to a running instance by loaded modules 128 * via registration. (The current implementation does not support unloadable 129 * modules; this functionality can be added if needed, via an 130 * activation/deactivation interface involving the manipulation of the 131 * ops vector for the resource control(s) needing to support unloading.) 132 * 133 * Control value ordering 134 * Because the rctl_val chain on each rctl must be navigable in a 135 * deterministic way, we have to define an ordering on the rctl_val_t's. The 136 * defined order is (flags & [maximal], value, flags & [deny-action], 137 * privilege). 138 * 139 * Locking 140 * rctl_dict_lock must be acquired prior to rctl_lists_lock. Since 141 * rctl_dict_lock or rctl_lists_lock can be called at the enforcement point 142 * of any subsystem, holding subsystem locks, it is at all times inappropriate 143 * to call kmem_alloc(., KM_SLEEP) while holding either of these locks. 144 * Traversing any of the various resource control entity lists requires 145 * holding rctl_lists_lock. 146 * 147 * Each individual resource control set associated with an entity must have 148 * its rcs_lock held for the duration of any operations that would add 149 * resource controls or control values to the set. 150 * 151 * The locking subsequence of interest is: p_lock, rctl_dict_lock, 152 * rctl_lists_lock, entity->rcs_lock. 153 */ 154 155 id_t max_rctl_hndl = 32768; 156 int rctl_dict_size = 64; 157 int rctl_set_size = 8; 158 kmutex_t rctl_dict_lock; 159 mod_hash_t *rctl_dict; 160 mod_hash_t *rctl_dict_by_name; 161 id_space_t *rctl_ids; 162 kmem_cache_t *rctl_cache; /* kmem cache for rctl structures */ 163 kmem_cache_t *rctl_val_cache; /* kmem cache for rctl values */ 164 165 kmutex_t rctl_lists_lock; 166 rctl_dict_entry_t *rctl_lists[RC_MAX_ENTITY + 1]; 167 168 /* 169 * Default resource control operations and ops vector 170 * To be used if the particular rcontrol has no specific actions defined, or 171 * if the subsystem providing the control is quiescing (in preparation for 172 * unloading, presumably.) 173 * 174 * Resource controls with callbacks should fill the unused operations with the 175 * appropriate default impotent callback. 176 */ 177 /*ARGSUSED*/ 178 void 179 rcop_no_action(struct rctl *r, struct proc *p, rctl_entity_p_t *e) 180 { 181 } 182 183 /*ARGSUSED*/ 184 rctl_qty_t 185 rcop_no_usage(struct rctl *r, struct proc *p) 186 { 187 return (0); 188 } 189 190 /*ARGSUSED*/ 191 int 192 rcop_no_set(struct rctl *r, struct proc *p, rctl_entity_p_t *e, rctl_qty_t l) 193 { 194 return (0); 195 } 196 197 /*ARGSUSED*/ 198 int 199 rcop_no_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 200 struct rctl_val *rv, rctl_qty_t i, uint_t f) 201 { 202 return (0); 203 } 204 205 rctl_ops_t rctl_default_ops = { 206 rcop_no_action, 207 rcop_no_usage, 208 rcop_no_set, 209 rcop_no_test 210 }; 211 212 /* 213 * Default "absolute" resource control operation and ops vector 214 * Useful if there is no usage associated with the 215 * resource control. 216 */ 217 /*ARGSUSED*/ 218 int 219 rcop_absolute_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 220 struct rctl_val *rv, rctl_qty_t i, uint_t f) 221 { 222 return (i > rv->rcv_value); 223 } 224 225 rctl_ops_t rctl_absolute_ops = { 226 rcop_no_action, 227 rcop_no_usage, 228 rcop_no_set, 229 rcop_absolute_test 230 }; 231 232 /*ARGSUSED*/ 233 static uint_t 234 rctl_dict_hash_by_id(void *hash_data, mod_hash_key_t key) 235 { 236 return ((uint_t)(uintptr_t)key % rctl_dict_size); 237 } 238 239 static int 240 rctl_dict_id_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 241 { 242 uint_t u1 = (uint_t)(uintptr_t)key1; 243 uint_t u2 = (uint_t)(uintptr_t)key2; 244 245 if (u1 > u2) 246 return (1); 247 248 if (u1 == u2) 249 return (0); 250 251 return (-1); 252 } 253 254 static void 255 rctl_dict_val_dtor(mod_hash_val_t val) 256 { 257 rctl_dict_entry_t *kr = (rctl_dict_entry_t *)val; 258 259 kmem_free(kr, sizeof (rctl_dict_entry_t)); 260 } 261 262 /* 263 * size_t rctl_build_name_buf() 264 * 265 * Overview 266 * rctl_build_name_buf() walks all active resource controls in the dictionary, 267 * building a buffer of continguous NUL-terminated strings. 268 * 269 * Return values 270 * The size of the buffer is returned, the passed pointer's contents are 271 * modified to that of the location of the buffer. 272 * 273 * Caller's context 274 * Caller must be in a context suitable for KM_SLEEP allocations. 275 */ 276 size_t 277 rctl_build_name_buf(char **rbufp) 278 { 279 size_t req_size, cpy_size; 280 char *rbufloc; 281 int i; 282 283 rctl_rebuild_name_buf: 284 req_size = cpy_size = 0; 285 286 /* 287 * Calculate needed buffer length. 288 */ 289 mutex_enter(&rctl_lists_lock); 290 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 291 rctl_dict_entry_t *rde; 292 293 for (rde = rctl_lists[i]; 294 rde != NULL; 295 rde = rde->rcd_next) 296 req_size += strlen(rde->rcd_name) + 1; 297 } 298 mutex_exit(&rctl_lists_lock); 299 300 rbufloc = *rbufp = kmem_alloc(req_size, KM_SLEEP); 301 302 /* 303 * Copy rctl names into our buffer. If the copy length exceeds the 304 * allocate length (due to registration changes), stop copying, free the 305 * buffer, and start again. 306 */ 307 mutex_enter(&rctl_lists_lock); 308 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 309 rctl_dict_entry_t *rde; 310 311 for (rde = rctl_lists[i]; 312 rde != NULL; 313 rde = rde->rcd_next) { 314 size_t length = strlen(rde->rcd_name) + 1; 315 316 cpy_size += length; 317 318 if (cpy_size > req_size) { 319 kmem_free(*rbufp, req_size); 320 mutex_exit(&rctl_lists_lock); 321 goto rctl_rebuild_name_buf; 322 } 323 324 bcopy(rde->rcd_name, rbufloc, length); 325 rbufloc += length; 326 } 327 } 328 mutex_exit(&rctl_lists_lock); 329 330 return (req_size); 331 } 332 333 /* 334 * rctl_dict_entry_t *rctl_dict_lookup(const char *) 335 * 336 * Overview 337 * rctl_dict_lookup() returns the resource control dictionary entry for the 338 * named resource control. 339 * 340 * Return values 341 * A pointer to the appropriate resource control dictionary entry, or NULL if 342 * no such named entry exists. 343 * 344 * Caller's context 345 * Caller must not be holding rctl_dict_lock. 346 */ 347 rctl_dict_entry_t * 348 rctl_dict_lookup(const char *name) 349 { 350 rctl_dict_entry_t *rde; 351 352 mutex_enter(&rctl_dict_lock); 353 354 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 355 (mod_hash_val_t *)&rde) == MH_ERR_NOTFOUND) { 356 mutex_exit(&rctl_dict_lock); 357 return (NULL); 358 } 359 360 mutex_exit(&rctl_dict_lock); 361 362 return (rde); 363 } 364 365 /* 366 * rctl_hndl_t rctl_hndl_lookup(const char *) 367 * 368 * Overview 369 * rctl_hndl_lookup() returns the resource control id (the "handle") for the 370 * named resource control. 371 * 372 * Return values 373 * The appropriate id, or -1 if no such named entry exists. 374 * 375 * Caller's context 376 * Caller must not be holding rctl_dict_lock. 377 */ 378 rctl_hndl_t 379 rctl_hndl_lookup(const char *name) 380 { 381 rctl_dict_entry_t *rde; 382 383 if ((rde = rctl_dict_lookup(name)) == NULL) 384 return (-1); 385 386 return (rde->rcd_id); 387 } 388 389 /* 390 * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t) 391 * 392 * Overview 393 * rctl_dict_lookup_hndl() completes the public lookup functions, by returning 394 * the resource control dictionary entry matching a given resource control id. 395 * 396 * Return values 397 * A pointer to the matching resource control dictionary entry, or NULL if the 398 * id does not match any existing entries. 399 * 400 * Caller's context 401 * Caller must not be holding rctl_lists_lock. 402 */ 403 rctl_dict_entry_t * 404 rctl_dict_lookup_hndl(rctl_hndl_t hndl) 405 { 406 uint_t i; 407 408 mutex_enter(&rctl_lists_lock); 409 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 410 rctl_dict_entry_t *rde; 411 412 for (rde = rctl_lists[i]; 413 rde != NULL; 414 rde = rde->rcd_next) 415 if (rde->rcd_id == hndl) { 416 mutex_exit(&rctl_lists_lock); 417 return (rde); 418 } 419 } 420 mutex_exit(&rctl_lists_lock); 421 422 return (NULL); 423 } 424 425 /* 426 * void rctl_add_default_limit(const char *name, rctl_qty_t value, 427 * rctl_priv_t privilege, uint_t action) 428 * 429 * Overview 430 * Create a default limit with specified value, privilege, and action. 431 * 432 * Return value 433 * No value returned. 434 */ 435 void 436 rctl_add_default_limit(const char *name, rctl_qty_t value, 437 rctl_priv_t privilege, uint_t action) 438 { 439 rctl_val_t *dval; 440 rctl_dict_entry_t *rde; 441 442 dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 443 bzero(dval, sizeof (rctl_val_t)); 444 dval->rcv_value = value; 445 dval->rcv_privilege = privilege; 446 dval->rcv_flagaction = action; 447 dval->rcv_action_recip_pid = -1; 448 449 rde = rctl_dict_lookup(name); 450 (void) rctl_val_list_insert(&rde->rcd_default_value, dval); 451 } 452 453 /* 454 * void rctl_add_legacy_limit(const char *name, const char *mname, 455 * const char *lname, rctl_qty_t dflt) 456 * 457 * Overview 458 * Create a default privileged limit, using the value obtained from 459 * /etc/system if it exists and is greater than the specified default 460 * value. Exists primarily for System V IPC. 461 * 462 * Return value 463 * No value returned. 464 */ 465 void 466 rctl_add_legacy_limit(const char *name, const char *mname, const char *lname, 467 rctl_qty_t dflt, rctl_qty_t max) 468 { 469 rctl_qty_t qty; 470 471 if (!mod_sysvar(mname, lname, &qty) || (qty < dflt)) 472 qty = dflt; 473 474 if (qty > max) 475 qty = max; 476 477 rctl_add_default_limit(name, qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY); 478 } 479 480 static rctl_set_t * 481 rctl_entity_obtain_rset(rctl_dict_entry_t *rcd, struct proc *p) 482 { 483 rctl_set_t *rset = NULL; 484 485 if (rcd == NULL) 486 return (NULL); 487 488 switch (rcd->rcd_entity) { 489 case RCENTITY_PROCESS: 490 rset = p->p_rctls; 491 break; 492 case RCENTITY_TASK: 493 ASSERT(MUTEX_HELD(&p->p_lock)); 494 if (p->p_task != NULL) 495 rset = p->p_task->tk_rctls; 496 break; 497 case RCENTITY_PROJECT: 498 ASSERT(MUTEX_HELD(&p->p_lock)); 499 if (p->p_task != NULL && 500 p->p_task->tk_proj != NULL) 501 rset = p->p_task->tk_proj->kpj_rctls; 502 break; 503 case RCENTITY_ZONE: 504 ASSERT(MUTEX_HELD(&p->p_lock)); 505 if (p->p_zone != NULL) 506 rset = p->p_zone->zone_rctls; 507 break; 508 default: 509 panic("unknown rctl entity type %d seen", rcd->rcd_entity); 510 break; 511 } 512 513 return (rset); 514 } 515 516 static void 517 rctl_entity_obtain_entity_p(rctl_entity_t entity, struct proc *p, 518 rctl_entity_p_t *e) 519 { 520 e->rcep_p.proc = NULL; 521 e->rcep_t = entity; 522 523 switch (entity) { 524 case RCENTITY_PROCESS: 525 e->rcep_p.proc = p; 526 break; 527 case RCENTITY_TASK: 528 ASSERT(MUTEX_HELD(&p->p_lock)); 529 if (p->p_task != NULL) 530 e->rcep_p.task = p->p_task; 531 break; 532 case RCENTITY_PROJECT: 533 ASSERT(MUTEX_HELD(&p->p_lock)); 534 if (p->p_task != NULL && 535 p->p_task->tk_proj != NULL) 536 e->rcep_p.proj = p->p_task->tk_proj; 537 break; 538 case RCENTITY_ZONE: 539 ASSERT(MUTEX_HELD(&p->p_lock)); 540 if (p->p_zone != NULL) 541 e->rcep_p.zone = p->p_zone; 542 break; 543 default: 544 panic("unknown rctl entity type %d seen", entity); 545 break; 546 } 547 } 548 549 static void 550 rctl_gp_alloc(rctl_alloc_gp_t *rcgp) 551 { 552 uint_t i; 553 554 if (rcgp->rcag_nctls > 0) { 555 rctl_t *prev = kmem_cache_alloc(rctl_cache, KM_SLEEP); 556 rctl_t *rctl = prev; 557 558 rcgp->rcag_ctls = prev; 559 560 for (i = 1; i < rcgp->rcag_nctls; i++) { 561 rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 562 prev->rc_next = rctl; 563 prev = rctl; 564 } 565 566 rctl->rc_next = NULL; 567 } 568 569 if (rcgp->rcag_nvals > 0) { 570 rctl_val_t *prev = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 571 rctl_val_t *rval = prev; 572 573 rcgp->rcag_vals = prev; 574 575 for (i = 1; i < rcgp->rcag_nvals; i++) { 576 rval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 577 prev->rcv_next = rval; 578 prev = rval; 579 } 580 581 rval->rcv_next = NULL; 582 } 583 584 } 585 586 static rctl_val_t * 587 rctl_gp_detach_val(rctl_alloc_gp_t *rcgp) 588 { 589 rctl_val_t *rval = rcgp->rcag_vals; 590 591 ASSERT(rcgp->rcag_nvals > 0); 592 rcgp->rcag_nvals--; 593 rcgp->rcag_vals = rval->rcv_next; 594 595 rval->rcv_next = NULL; 596 597 return (rval); 598 } 599 600 static rctl_t * 601 rctl_gp_detach_ctl(rctl_alloc_gp_t *rcgp) 602 { 603 rctl_t *rctl = rcgp->rcag_ctls; 604 605 ASSERT(rcgp->rcag_nctls > 0); 606 rcgp->rcag_nctls--; 607 rcgp->rcag_ctls = rctl->rc_next; 608 609 rctl->rc_next = NULL; 610 611 return (rctl); 612 613 } 614 615 static void 616 rctl_gp_free(rctl_alloc_gp_t *rcgp) 617 { 618 rctl_val_t *rval = rcgp->rcag_vals; 619 rctl_t *rctl = rcgp->rcag_ctls; 620 621 while (rval != NULL) { 622 rctl_val_t *next = rval->rcv_next; 623 624 kmem_cache_free(rctl_val_cache, rval); 625 rval = next; 626 } 627 628 while (rctl != NULL) { 629 rctl_t *next = rctl->rc_next; 630 631 kmem_cache_free(rctl_cache, rctl); 632 rctl = next; 633 } 634 } 635 636 /* 637 * void rctl_prealloc_destroy(rctl_alloc_gp_t *) 638 * 639 * Overview 640 * Release all unused memory allocated via one of the "prealloc" functions: 641 * rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc. 642 * 643 * Return values 644 * None. 645 * 646 * Caller's context 647 * No restrictions on context. 648 */ 649 void 650 rctl_prealloc_destroy(rctl_alloc_gp_t *gp) 651 { 652 rctl_gp_free(gp); 653 kmem_free(gp, sizeof (rctl_alloc_gp_t)); 654 } 655 656 /* 657 * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int) 658 * 659 * Overview 660 * This function defines an ordering to rctl_val_t's in order to allow 661 * for correct placement in value lists. When the imprecise flag is set, 662 * the action recipient is ignored. This is to facilitate insert, 663 * delete, and replace operations by rctlsys. 664 * 665 * Return values 666 * 0 if the val_t's are are considered identical 667 * -1 if a is ordered lower than b 668 * 1 if a is lowered higher than b 669 * 670 * Caller's context 671 * No restrictions on context. 672 */ 673 int 674 rctl_val_cmp(rctl_val_t *a, rctl_val_t *b, int imprecise) 675 { 676 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) < 677 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 678 return (-1); 679 680 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) > 681 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 682 return (1); 683 684 if (a->rcv_value < b->rcv_value) 685 return (-1); 686 687 if (a->rcv_value > b->rcv_value) 688 return (1); 689 690 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) < 691 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 692 return (-1); 693 694 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) > 695 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 696 return (1); 697 698 if (a->rcv_privilege < b->rcv_privilege) 699 return (-1); 700 701 if (a->rcv_privilege > b->rcv_privilege) 702 return (1); 703 704 if (imprecise) 705 return (0); 706 707 if (a->rcv_action_recip_pid < b->rcv_action_recip_pid) 708 return (-1); 709 710 if (a->rcv_action_recip_pid > b->rcv_action_recip_pid) 711 return (1); 712 713 return (0); 714 } 715 716 static rctl_val_t * 717 rctl_val_list_find(rctl_val_t **head, rctl_val_t *cval) 718 { 719 rctl_val_t *rval = *head; 720 721 while (rval != NULL) { 722 if (rctl_val_cmp(cval, rval, 0) == 0) 723 return (rval); 724 725 rval = rval->rcv_next; 726 } 727 728 return (NULL); 729 730 } 731 732 /* 733 * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *) 734 * 735 * Overview 736 * This function inserts the rctl_val_t into the value list provided. 737 * The insert is always successful unless if the value is a duplicate 738 * of one already in the list. 739 * 740 * Return values 741 * 1 if the value was a duplicate of an existing value in the list. 742 * 0 if the insert was successful. 743 */ 744 int 745 rctl_val_list_insert(rctl_val_t **root, rctl_val_t *rval) 746 { 747 rctl_val_t *prev; 748 int equiv; 749 750 rval->rcv_next = NULL; 751 rval->rcv_prev = NULL; 752 753 if (*root == NULL) { 754 *root = rval; 755 return (0); 756 } 757 758 equiv = rctl_val_cmp(rval, *root, 0); 759 760 if (equiv == 0) 761 return (1); 762 763 if (equiv < 0) { 764 rval->rcv_next = *root; 765 rval->rcv_next->rcv_prev = rval; 766 *root = rval; 767 768 return (0); 769 } 770 771 prev = *root; 772 while (prev->rcv_next != NULL && 773 (equiv = rctl_val_cmp(rval, prev->rcv_next, 0)) > 0) { 774 prev = prev->rcv_next; 775 } 776 777 if (equiv == 0) 778 return (1); 779 780 rval->rcv_next = prev->rcv_next; 781 if (rval->rcv_next != NULL) 782 rval->rcv_next->rcv_prev = rval; 783 prev->rcv_next = rval; 784 rval->rcv_prev = prev; 785 786 return (0); 787 } 788 789 static int 790 rctl_val_list_delete(rctl_val_t **root, rctl_val_t *rval) 791 { 792 rctl_val_t *prev; 793 794 if (*root == NULL) 795 return (-1); 796 797 prev = *root; 798 if (rctl_val_cmp(rval, prev, 0) == 0) { 799 *root = prev->rcv_next; 800 (*root)->rcv_prev = NULL; 801 802 kmem_cache_free(rctl_val_cache, prev); 803 804 return (0); 805 } 806 807 while (prev->rcv_next != NULL && 808 rctl_val_cmp(rval, prev->rcv_next, 0) != 0) { 809 prev = prev->rcv_next; 810 } 811 812 if (prev->rcv_next == NULL) { 813 /* 814 * If we navigate the entire list and cannot find a match, then 815 * return failure. 816 */ 817 return (-1); 818 } 819 820 prev = prev->rcv_next; 821 prev->rcv_prev->rcv_next = prev->rcv_next; 822 if (prev->rcv_next != NULL) 823 prev->rcv_next->rcv_prev = prev->rcv_prev; 824 825 kmem_cache_free(rctl_val_cache, prev); 826 827 return (0); 828 } 829 830 static rctl_val_t * 831 rctl_val_list_dup(rctl_val_t *rval, rctl_alloc_gp_t *ragp, struct proc *oldp, 832 struct proc *newp) 833 { 834 rctl_val_t *head = NULL; 835 836 for (; rval != NULL; rval = rval->rcv_next) { 837 rctl_val_t *dval = rctl_gp_detach_val(ragp); 838 839 bcopy(rval, dval, sizeof (rctl_val_t)); 840 dval->rcv_prev = dval->rcv_next = NULL; 841 842 if (oldp == NULL || 843 rval->rcv_action_recipient == NULL || 844 rval->rcv_action_recipient == oldp) { 845 if (rval->rcv_privilege == RCPRIV_BASIC) { 846 dval->rcv_action_recipient = newp; 847 dval->rcv_action_recip_pid = newp->p_pid; 848 } else { 849 dval->rcv_action_recipient = NULL; 850 dval->rcv_action_recip_pid = -1; 851 } 852 853 (void) rctl_val_list_insert(&head, dval); 854 } else { 855 kmem_cache_free(rctl_val_cache, dval); 856 } 857 } 858 859 return (head); 860 } 861 862 static void 863 rctl_val_list_reset(rctl_val_t *rval) 864 { 865 for (; rval != NULL; rval = rval->rcv_next) 866 rval->rcv_firing_time = 0; 867 } 868 869 static uint_t 870 rctl_val_list_count(rctl_val_t *rval) 871 { 872 uint_t n = 0; 873 874 for (; rval != NULL; rval = rval->rcv_next) 875 n++; 876 877 return (n); 878 } 879 880 881 static void 882 rctl_val_list_free(rctl_val_t *rval) 883 { 884 while (rval != NULL) { 885 rctl_val_t *next = rval->rcv_next; 886 887 kmem_cache_free(rctl_val_cache, rval); 888 889 rval = next; 890 } 891 } 892 893 /* 894 * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *) 895 * 896 * Overview 897 * In cases where the operating system supports more than one process 898 * addressing model, the operating system capabilities will exceed those of 899 * one or more of these models. Processes in a less capable model must have 900 * their resources accurately controlled, without diluting those of their 901 * descendants reached via exec(). rctl_model_maximum() returns the governing 902 * value for the specified process with respect to a resource control, such 903 * that the value can used for the RCTLOP_SET callback or compatability 904 * support. 905 * 906 * Return values 907 * The maximum value for the given process for the specified resource control. 908 * 909 * Caller's context 910 * No restrictions on context. 911 */ 912 rctl_qty_t 913 rctl_model_maximum(rctl_dict_entry_t *rde, struct proc *p) 914 { 915 if (p->p_model == DATAMODEL_NATIVE) 916 return (rde->rcd_max_native); 917 918 return (rde->rcd_max_ilp32); 919 } 920 921 /* 922 * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t) 923 * 924 * Overview 925 * Convenience function wrapping the rctl_model_maximum() functionality. 926 * 927 * Return values 928 * The lesser of the process's maximum value and the given value for the 929 * specified resource control. 930 * 931 * Caller's context 932 * No restrictions on context. 933 */ 934 rctl_qty_t 935 rctl_model_value(rctl_dict_entry_t *rde, struct proc *p, rctl_qty_t value) 936 { 937 rctl_qty_t max = rctl_model_maximum(rde, p); 938 939 return (value < max ? value : max); 940 } 941 942 static void 943 rctl_set_insert(rctl_set_t *set, rctl_hndl_t hndl, rctl_t *rctl) 944 { 945 uint_t index = hndl % rctl_set_size; 946 rctl_t *next_ctl, *prev_ctl; 947 948 ASSERT(MUTEX_HELD(&set->rcs_lock)); 949 950 rctl->rc_next = NULL; 951 952 if (set->rcs_ctls[index] == NULL) { 953 set->rcs_ctls[index] = rctl; 954 return; 955 } 956 957 if (hndl < set->rcs_ctls[index]->rc_id) { 958 rctl->rc_next = set->rcs_ctls[index]; 959 set->rcs_ctls[index] = rctl; 960 961 return; 962 } 963 964 for (next_ctl = set->rcs_ctls[index]->rc_next, 965 prev_ctl = set->rcs_ctls[index]; 966 next_ctl != NULL; 967 prev_ctl = next_ctl, 968 next_ctl = next_ctl->rc_next) { 969 if (next_ctl->rc_id > hndl) { 970 rctl->rc_next = next_ctl; 971 prev_ctl->rc_next = rctl; 972 973 return; 974 } 975 } 976 977 rctl->rc_next = next_ctl; 978 prev_ctl->rc_next = rctl; 979 } 980 981 /* 982 * rctl_set_t *rctl_set_create() 983 * 984 * Overview 985 * Create an empty resource control set, suitable for attaching to a 986 * controlled entity. 987 * 988 * Return values 989 * A pointer to the newly created set. 990 * 991 * Caller's context 992 * Safe for KM_SLEEP allocations. 993 */ 994 rctl_set_t * 995 rctl_set_create() 996 { 997 rctl_set_t *rset = kmem_zalloc(sizeof (rctl_set_t), KM_SLEEP); 998 999 mutex_init(&rset->rcs_lock, NULL, MUTEX_DEFAULT, NULL); 1000 rset->rcs_ctls = kmem_zalloc(rctl_set_size * sizeof (rctl_t *), 1001 KM_SLEEP); 1002 rset->rcs_entity = -1; 1003 1004 return (rset); 1005 } 1006 1007 /* 1008 * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t) 1009 * 1010 * Overview 1011 * rctl_set_init_prealloc() examines the globally defined resource controls 1012 * and their default values and returns a resource control allocation group 1013 * populated with sufficient controls and values to form a representative 1014 * resource control set for the specified entity. 1015 * 1016 * Return values 1017 * A pointer to the newly created allocation group. 1018 * 1019 * Caller's context 1020 * Caller must be in a context suitable for KM_SLEEP allocations. 1021 */ 1022 rctl_alloc_gp_t * 1023 rctl_set_init_prealloc(rctl_entity_t entity) 1024 { 1025 rctl_dict_entry_t *rde; 1026 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1027 1028 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1029 1030 if (rctl_lists[entity] == NULL) 1031 return (ragp); 1032 1033 mutex_enter(&rctl_lists_lock); 1034 1035 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1036 ragp->rcag_nctls++; 1037 ragp->rcag_nvals += rctl_val_list_count(rde->rcd_default_value); 1038 } 1039 1040 mutex_exit(&rctl_lists_lock); 1041 1042 rctl_gp_alloc(ragp); 1043 1044 return (ragp); 1045 } 1046 1047 /* 1048 * rctl_set_t *rctl_set_init(rctl_entity_t) 1049 * 1050 * Overview 1051 * rctl_set_create() creates a resource control set, initialized with the 1052 * system infinite values on all registered controls, for attachment to a 1053 * system entity requiring resource controls, such as a process or a task. 1054 * 1055 * Return values 1056 * A pointer to the newly filled set. 1057 * 1058 * Caller's context 1059 * Caller must be holding p_lock on entry so that RCTLOP_SET() functions 1060 * may modify task and project members based on the proc structure 1061 * they are passed. 1062 */ 1063 rctl_set_t * 1064 rctl_set_init(rctl_entity_t entity, struct proc *p, rctl_entity_p_t *e, 1065 rctl_set_t *rset, rctl_alloc_gp_t *ragp) 1066 { 1067 rctl_dict_entry_t *rde; 1068 1069 ASSERT(MUTEX_HELD(&p->p_lock)); 1070 ASSERT(e); 1071 rset->rcs_entity = entity; 1072 1073 if (rctl_lists[entity] == NULL) 1074 return (rset); 1075 1076 mutex_enter(&rctl_lists_lock); 1077 mutex_enter(&rset->rcs_lock); 1078 1079 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1080 rctl_t *rctl = rctl_gp_detach_ctl(ragp); 1081 1082 rctl->rc_dict_entry = rde; 1083 rctl->rc_id = rde->rcd_id; 1084 1085 rctl->rc_values = rctl_val_list_dup(rde->rcd_default_value, 1086 ragp, NULL, p); 1087 rctl->rc_cursor = rctl->rc_values; 1088 1089 ASSERT(rctl->rc_cursor != NULL); 1090 1091 rctl_set_insert(rset, rde->rcd_id, rctl); 1092 1093 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1094 rctl->rc_cursor->rcv_value)); 1095 } 1096 1097 mutex_exit(&rset->rcs_lock); 1098 mutex_exit(&rctl_lists_lock); 1099 1100 return (rset); 1101 } 1102 1103 static rctl_t * 1104 rctl_dup(rctl_t *rctl, rctl_alloc_gp_t *ragp, struct proc *oldp, 1105 struct proc *newp) 1106 { 1107 rctl_t *dup = rctl_gp_detach_ctl(ragp); 1108 rctl_val_t *dval; 1109 1110 dup->rc_id = rctl->rc_id; 1111 dup->rc_dict_entry = rctl->rc_dict_entry; 1112 dup->rc_next = NULL; 1113 dup->rc_cursor = NULL; 1114 dup->rc_values = rctl_val_list_dup(rctl->rc_values, ragp, oldp, newp); 1115 1116 for (dval = dup->rc_values; 1117 dval != NULL; dval = dval->rcv_next) { 1118 if (rctl_val_cmp(rctl->rc_cursor, dval, 0) >= 0) { 1119 dup->rc_cursor = dval; 1120 break; 1121 } 1122 } 1123 1124 if (dup->rc_cursor == NULL) 1125 dup->rc_cursor = dup->rc_values; 1126 1127 return (dup); 1128 } 1129 1130 static void 1131 rctl_set_fill_alloc_gp(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1132 { 1133 uint_t i; 1134 1135 bzero(ragp, sizeof (rctl_alloc_gp_t)); 1136 1137 for (i = 0; i < rctl_set_size; i++) { 1138 rctl_t *r = set->rcs_ctls[i]; 1139 1140 while (r != NULL) { 1141 ragp->rcag_nctls++; 1142 1143 ragp->rcag_nvals += rctl_val_list_count(r->rc_values); 1144 1145 r = r->rc_next; 1146 } 1147 } 1148 } 1149 1150 /* 1151 * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *) 1152 * 1153 * Overview 1154 * Given a resource control set, allocate a sufficiently large allocation 1155 * group to contain a duplicate of the set. 1156 * 1157 * Return value 1158 * A pointer to the newly created allocation group. 1159 * 1160 * Caller's context 1161 * Safe for KM_SLEEP allocations. 1162 */ 1163 rctl_alloc_gp_t * 1164 rctl_set_dup_prealloc(rctl_set_t *set) 1165 { 1166 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1167 1168 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1169 1170 mutex_enter(&set->rcs_lock); 1171 rctl_set_fill_alloc_gp(set, ragp); 1172 mutex_exit(&set->rcs_lock); 1173 1174 rctl_gp_alloc(ragp); 1175 1176 return (ragp); 1177 } 1178 1179 /* 1180 * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *) 1181 * 1182 * Overview 1183 * Verify that the allocation group provided is large enough to allow a 1184 * duplicate of the given resource control set to be constructed from its 1185 * contents. 1186 * 1187 * Return values 1188 * 1 if the allocation group is sufficiently large, 0 otherwise. 1189 * 1190 * Caller's context 1191 * rcs_lock must be held prior to entry. 1192 */ 1193 int 1194 rctl_set_dup_ready(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1195 { 1196 rctl_alloc_gp_t curr_gp; 1197 1198 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1199 1200 rctl_set_fill_alloc_gp(set, &curr_gp); 1201 1202 if (curr_gp.rcag_nctls <= ragp->rcag_nctls && 1203 curr_gp.rcag_nvals <= ragp->rcag_nvals) 1204 return (1); 1205 1206 return (0); 1207 } 1208 1209 /* 1210 * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *, 1211 * rctl_set_t *, rctl_alloc_gp_t *, int) 1212 * 1213 * Overview 1214 * Make a duplicate of the resource control set. The proc pointers are those 1215 * of the owning process and of the process associated with the entity 1216 * receiving the duplicate. 1217 * 1218 * Duplication is a 3 stage process. Stage 1 is memory allocation for 1219 * the duplicate set, which is taken care of by rctl_set_dup_prealloc(). 1220 * Stage 2 consists of copying all rctls and values from the old set into 1221 * the new. Stage 3 completes the duplication by performing the appropriate 1222 * callbacks for each rctl in the new set. 1223 * 1224 * Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and 1225 * RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only 1226 * be supplied if the newp proc structure reflects the new task and 1227 * project linkage. 1228 * 1229 * Return value 1230 * A pointer to the duplicate set. 1231 * 1232 * Caller's context 1233 * The rcs_lock of the set to be duplicated must be held prior to entry. 1234 */ 1235 rctl_set_t * 1236 rctl_set_dup(rctl_set_t *set, struct proc *oldp, struct proc *newp, 1237 rctl_entity_p_t *e, rctl_set_t *dup, rctl_alloc_gp_t *ragp, int flag) 1238 { 1239 uint_t i; 1240 rctl_set_t *iter; 1241 1242 ASSERT((flag & RCD_DUP) || (flag & RCD_CALLBACK)); 1243 ASSERT(e); 1244 /* 1245 * When copying the old set, iterate over that. Otherwise, when 1246 * only callbacks have been requested, iterate over the dup set. 1247 */ 1248 if (flag & RCD_DUP) { 1249 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1250 iter = set; 1251 dup->rcs_entity = set->rcs_entity; 1252 } else { 1253 iter = dup; 1254 } 1255 1256 mutex_enter(&dup->rcs_lock); 1257 1258 for (i = 0; i < rctl_set_size; i++) { 1259 rctl_t *r = iter->rcs_ctls[i]; 1260 rctl_t *d; 1261 1262 while (r != NULL) { 1263 if (flag & RCD_DUP) { 1264 d = rctl_dup(r, ragp, oldp, newp); 1265 rctl_set_insert(dup, r->rc_id, d); 1266 } else { 1267 d = r; 1268 } 1269 1270 if (flag & RCD_CALLBACK) 1271 RCTLOP_SET(d, newp, e, 1272 rctl_model_value(d->rc_dict_entry, newp, 1273 d->rc_cursor->rcv_value)); 1274 1275 r = r->rc_next; 1276 } 1277 } 1278 1279 mutex_exit(&dup->rcs_lock); 1280 1281 return (dup); 1282 } 1283 1284 /* 1285 * void rctl_set_free(rctl_set_t *) 1286 * 1287 * Overview 1288 * Delete resource control set and all attached values. 1289 * 1290 * Return values 1291 * No value returned. 1292 * 1293 * Caller's context 1294 * No restrictions on context. 1295 */ 1296 void 1297 rctl_set_free(rctl_set_t *set) 1298 { 1299 uint_t i; 1300 1301 mutex_enter(&set->rcs_lock); 1302 for (i = 0; i < rctl_set_size; i++) { 1303 rctl_t *r = set->rcs_ctls[i]; 1304 1305 while (r != NULL) { 1306 rctl_val_t *v = r->rc_values; 1307 rctl_t *n = r->rc_next; 1308 1309 kmem_cache_free(rctl_cache, r); 1310 1311 rctl_val_list_free(v); 1312 1313 r = n; 1314 } 1315 } 1316 mutex_exit(&set->rcs_lock); 1317 1318 kmem_free(set->rcs_ctls, sizeof (rctl_t *) * rctl_set_size); 1319 kmem_free(set, sizeof (rctl_set_t)); 1320 } 1321 1322 /* 1323 * void rctl_set_reset(rctl_set_t *) 1324 * 1325 * Overview 1326 * Resets all rctls within the set such that the lowest value becomes active. 1327 * 1328 * Return values 1329 * No value returned. 1330 * 1331 * Caller's context 1332 * No restrictions on context. 1333 */ 1334 void 1335 rctl_set_reset(rctl_set_t *set, struct proc *p, rctl_entity_p_t *e) 1336 { 1337 uint_t i; 1338 1339 ASSERT(e); 1340 1341 mutex_enter(&set->rcs_lock); 1342 for (i = 0; i < rctl_set_size; i++) { 1343 rctl_t *r = set->rcs_ctls[i]; 1344 1345 while (r != NULL) { 1346 r->rc_cursor = r->rc_values; 1347 rctl_val_list_reset(r->rc_cursor); 1348 RCTLOP_SET(r, p, e, rctl_model_value(r->rc_dict_entry, 1349 p, r->rc_cursor->rcv_value)); 1350 1351 ASSERT(r->rc_cursor != NULL); 1352 1353 r = r->rc_next; 1354 } 1355 } 1356 1357 mutex_exit(&set->rcs_lock); 1358 } 1359 1360 /* 1361 * void rctl_set_tearoff(rctl_set *, struct proc *) 1362 * 1363 * Overview 1364 * Tear off any resource control values on this set with an action recipient 1365 * equal to the specified process (as they are becoming invalid with the 1366 * process's departure from this set as an observer). 1367 * 1368 * Return values 1369 * No value returned. 1370 * 1371 * Caller's context 1372 * No restrictions on context 1373 */ 1374 void 1375 rctl_set_tearoff(rctl_set_t *set, struct proc *p) 1376 { 1377 uint_t i; 1378 1379 mutex_enter(&set->rcs_lock); 1380 for (i = 0; i < rctl_set_size; i++) { 1381 rctl_t *r = set->rcs_ctls[i]; 1382 1383 while (r != NULL) { 1384 rctl_val_t *rval; 1385 1386 tearoff_rewalk_list: 1387 rval = r->rc_values; 1388 1389 while (rval != NULL) { 1390 if (rval->rcv_privilege == RCPRIV_BASIC && 1391 rval->rcv_action_recipient == p) { 1392 if (r->rc_cursor == rval) 1393 r->rc_cursor = rval->rcv_next; 1394 1395 (void) rctl_val_list_delete( 1396 &r->rc_values, rval); 1397 1398 goto tearoff_rewalk_list; 1399 } 1400 1401 rval = rval->rcv_next; 1402 } 1403 1404 ASSERT(r->rc_cursor != NULL); 1405 1406 r = r->rc_next; 1407 } 1408 } 1409 1410 mutex_exit(&set->rcs_lock); 1411 } 1412 1413 static int 1414 rctl_set_find(rctl_set_t *set, rctl_hndl_t hndl, rctl_t **rctl) 1415 { 1416 uint_t index = hndl % rctl_set_size; 1417 rctl_t *curr_ctl; 1418 1419 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1420 1421 for (curr_ctl = set->rcs_ctls[index]; curr_ctl != NULL; 1422 curr_ctl = curr_ctl->rc_next) { 1423 if (curr_ctl->rc_id == hndl) { 1424 *rctl = curr_ctl; 1425 1426 return (0); 1427 } 1428 } 1429 1430 return (-1); 1431 } 1432 1433 /* 1434 * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *) 1435 * 1436 * Overview 1437 * Given a process, get the next enforced value on the rctl of the specified 1438 * handle. 1439 * 1440 * Return value 1441 * The enforced value. 1442 * 1443 * Caller's context 1444 * For controls on process collectives, p->p_lock must be held across the 1445 * operation. 1446 */ 1447 /*ARGSUSED*/ 1448 rctl_qty_t 1449 rctl_enforced_value(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p) 1450 { 1451 rctl_t *rctl; 1452 rlim64_t ret; 1453 1454 mutex_enter(&rset->rcs_lock); 1455 1456 if (rctl_set_find(rset, hndl, &rctl) == -1) 1457 panic("unknown resource control handle %d requested", hndl); 1458 else 1459 ret = rctl_model_value(rctl->rc_dict_entry, p, 1460 rctl->rc_cursor->rcv_value); 1461 1462 mutex_exit(&rset->rcs_lock); 1463 1464 return (ret); 1465 } 1466 1467 /* 1468 * int rctl_global_get(const char *, rctl_dict_entry_t *) 1469 * 1470 * Overview 1471 * Copy a sanitized version of the global rctl for a given resource control 1472 * name. (By sanitization, we mean that the unsafe data pointers have been 1473 * zeroed.) 1474 * 1475 * Return value 1476 * -1 if name not defined, 0 otherwise. 1477 * 1478 * Caller's context 1479 * No restrictions on context. rctl_dict_lock must not be held. 1480 */ 1481 int 1482 rctl_global_get(const char *name, rctl_dict_entry_t *drde) 1483 { 1484 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1485 1486 if (rde == NULL) 1487 return (-1); 1488 1489 bcopy(rde, drde, sizeof (rctl_dict_entry_t)); 1490 1491 drde->rcd_next = NULL; 1492 drde->rcd_ops = NULL; 1493 1494 return (0); 1495 } 1496 1497 /* 1498 * int rctl_global_set(const char *, rctl_dict_entry_t *) 1499 * 1500 * Overview 1501 * Transfer the settable fields of the named rctl to the global rctl matching 1502 * the given resource control name. 1503 * 1504 * Return value 1505 * -1 if name not defined, 0 otherwise. 1506 * 1507 * Caller's context 1508 * No restrictions on context. rctl_dict_lock must not be held. 1509 */ 1510 int 1511 rctl_global_set(const char *name, rctl_dict_entry_t *drde) 1512 { 1513 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1514 1515 if (rde == NULL) 1516 return (-1); 1517 1518 rde->rcd_flagaction = drde->rcd_flagaction; 1519 rde->rcd_syslog_level = drde->rcd_syslog_level; 1520 rde->rcd_strlog_flags = drde->rcd_strlog_flags; 1521 1522 return (0); 1523 } 1524 1525 static int 1526 rctl_local_op(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1527 int (*cbop)(rctl_hndl_t, struct proc *p, rctl_entity_p_t *e, rctl_t *, 1528 rctl_val_t *, rctl_val_t *), struct proc *p) 1529 { 1530 rctl_t *rctl; 1531 rctl_set_t *rset; 1532 rctl_entity_p_t e; 1533 int ret = 0; 1534 rctl_dict_entry_t *rde = rctl_dict_lookup_hndl(hndl); 1535 1536 local_op_retry: 1537 1538 ASSERT(MUTEX_HELD(&p->p_lock)); 1539 1540 rset = rctl_entity_obtain_rset(rde, p); 1541 1542 if (rset == NULL) { 1543 return (-1); 1544 } 1545 rctl_entity_obtain_entity_p(rset->rcs_entity, p, &e); 1546 1547 mutex_enter(&rset->rcs_lock); 1548 1549 /* using rctl's hndl, get rctl from local set */ 1550 if (rctl_set_find(rset, hndl, &rctl) == -1) { 1551 mutex_exit(&rset->rcs_lock); 1552 return (-1); 1553 } 1554 1555 ret = cbop(hndl, p, &e, rctl, oval, nval); 1556 1557 mutex_exit(&rset->rcs_lock); 1558 return (ret); 1559 } 1560 1561 /*ARGSUSED*/ 1562 static int 1563 rctl_local_get_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1564 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1565 { 1566 if (oval == NULL) { 1567 /* 1568 * RCTL_FIRST 1569 */ 1570 bcopy(rctl->rc_values, nval, sizeof (rctl_val_t)); 1571 } else { 1572 /* 1573 * RCTL_NEXT 1574 */ 1575 rctl_val_t *tval = rctl_val_list_find(&rctl->rc_values, oval); 1576 1577 if (tval == NULL) 1578 return (ESRCH); 1579 else if (tval->rcv_next == NULL) 1580 return (ENOENT); 1581 else 1582 bcopy(tval->rcv_next, nval, sizeof (rctl_val_t)); 1583 } 1584 1585 return (0); 1586 } 1587 1588 /* 1589 * int rctl_local_get(rctl_hndl_t, rctl_val_t *) 1590 * 1591 * Overview 1592 * Get the rctl value for the given flags. 1593 * 1594 * Return values 1595 * 0 for successful get, errno otherwise. 1596 */ 1597 int 1598 rctl_local_get(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1599 struct proc *p) 1600 { 1601 return (rctl_local_op(hndl, oval, nval, rctl_local_get_cb, p)); 1602 } 1603 1604 /*ARGSUSED*/ 1605 static int 1606 rctl_local_delete_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1607 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1608 { 1609 if ((oval = rctl_val_list_find(&rctl->rc_values, nval)) == NULL) 1610 return (ESRCH); 1611 1612 if (rctl->rc_cursor == oval) { 1613 rctl->rc_cursor = oval->rcv_next; 1614 rctl_val_list_reset(rctl->rc_cursor); 1615 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1616 rctl->rc_cursor->rcv_value)); 1617 1618 ASSERT(rctl->rc_cursor != NULL); 1619 } 1620 1621 (void) rctl_val_list_delete(&rctl->rc_values, oval); 1622 1623 return (0); 1624 } 1625 1626 /* 1627 * int rctl_local_delete(rctl_hndl_t, rctl_val_t *) 1628 * 1629 * Overview 1630 * Delete the rctl value for the given flags. 1631 * 1632 * Return values 1633 * 0 for successful delete, errno otherwise. 1634 */ 1635 int 1636 rctl_local_delete(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1637 { 1638 return (rctl_local_op(hndl, NULL, val, rctl_local_delete_cb, p)); 1639 } 1640 1641 /* 1642 * rctl_local_insert_cb() 1643 * 1644 * Overview 1645 * Insert a new value into the rctl's val list. If an error occurs, 1646 * the val list must be left in the same state as when the function 1647 * was entered. 1648 * 1649 * Return Values 1650 * 0 for successful insert, EINVAL if the value is duplicated in the 1651 * existing list. 1652 */ 1653 /*ARGSUSED*/ 1654 static int 1655 rctl_local_insert_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1656 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1657 { 1658 /* 1659 * Before inserting, confirm there are no duplicates of this value 1660 * and flag level. If there is a duplicate, flag an error and do 1661 * nothing. 1662 */ 1663 if (rctl_val_list_insert(&rctl->rc_values, nval) != 0) 1664 return (EINVAL); 1665 1666 if (rctl_val_cmp(nval, rctl->rc_cursor, 0) < 0) { 1667 rctl->rc_cursor = nval; 1668 rctl_val_list_reset(rctl->rc_cursor); 1669 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1670 rctl->rc_cursor->rcv_value)); 1671 1672 ASSERT(rctl->rc_cursor != NULL); 1673 } 1674 1675 return (0); 1676 } 1677 1678 /* 1679 * int rctl_local_insert(rctl_hndl_t, rctl_val_t *) 1680 * 1681 * Overview 1682 * Insert the rctl value into the appropriate rctl set for the calling 1683 * process, given the handle. 1684 */ 1685 int 1686 rctl_local_insert(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1687 { 1688 return (rctl_local_op(hndl, NULL, val, rctl_local_insert_cb, p)); 1689 } 1690 1691 static int 1692 rctl_local_replace_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1693 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1694 { 1695 int ret; 1696 1697 /* 1698 * rctl_local_insert_cb() does the job of flagging an error 1699 * for any duplicate values. So, call rctl_local_insert_cb() 1700 * for the new value first, then do deletion of the old value. 1701 * Since this is a callback function to rctl_local_op, we can 1702 * count on rcs_lock being held at this point. This guarantees 1703 * that there is at no point a visible list which contains both 1704 * new and old values. 1705 */ 1706 if (ret = rctl_local_insert_cb(hndl, p, e, rctl, NULL, nval)) 1707 return (ret); 1708 1709 return (rctl_local_delete_cb(hndl, p, e, rctl, NULL, oval)); 1710 } 1711 1712 /* 1713 * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *) 1714 * 1715 * Overview 1716 * Replace the rctl value with a new one. 1717 * 1718 * Return values 1719 * 0 for successful replace, errno otherwise. 1720 */ 1721 int 1722 rctl_local_replace(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1723 struct proc *p) 1724 { 1725 return (rctl_local_op(hndl, oval, nval, rctl_local_replace_cb, p)); 1726 } 1727 1728 /* 1729 * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *) 1730 * 1731 * Overview 1732 * To support rlimit compatibility, we need a function which takes a 64-bit 1733 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 1734 * This operation is only intended for legacy rlimits. 1735 */ 1736 int 1737 rctl_rlimit_get(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64) 1738 { 1739 rctl_t *rctl; 1740 rctl_val_t *rval; 1741 rctl_set_t *rset = p->p_rctls; 1742 int soft_limit_seen = 0; 1743 int test_for_deny = 1; 1744 1745 mutex_enter(&rset->rcs_lock); 1746 if (rctl_set_find(rset, rc, &rctl) == -1) { 1747 mutex_exit(&rset->rcs_lock); 1748 return (-1); 1749 } 1750 1751 rval = rctl->rc_values; 1752 1753 if (rctl->rc_dict_entry->rcd_flagaction & (RCTL_GLOBAL_DENY_NEVER | 1754 RCTL_GLOBAL_DENY_ALWAYS)) 1755 test_for_deny = 0; 1756 1757 /* 1758 * 1. Find the first control value with the RCTL_LOCAL_DENY bit set. 1759 */ 1760 while (rval != NULL && rval->rcv_privilege != RCPRIV_SYSTEM) { 1761 if (test_for_deny && 1762 (rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0) { 1763 rval = rval->rcv_next; 1764 continue; 1765 } 1766 1767 /* 1768 * 2. If this is an RCPRIV_BASIC value, then we've found the 1769 * effective soft limit and should set rlim_cur. We should then 1770 * continue looking for another control value with the DENY bit 1771 * set. 1772 */ 1773 if (rval->rcv_privilege == RCPRIV_BASIC) { 1774 if (soft_limit_seen) { 1775 rval = rval->rcv_next; 1776 continue; 1777 } 1778 1779 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 1780 rval->rcv_value < rctl_model_maximum( 1781 rctl->rc_dict_entry, p)) 1782 rlp64->rlim_cur = rval->rcv_value; 1783 else 1784 rlp64->rlim_cur = RLIM64_INFINITY; 1785 soft_limit_seen = 1; 1786 1787 rval = rval->rcv_next; 1788 continue; 1789 } 1790 1791 /* 1792 * 3. This is an RCPRIV_PRIVILEGED value. If we haven't found 1793 * a soft limit candidate, then we've found the effective hard 1794 * and soft limits and should set both If we had found a soft 1795 * limit, then this is only the hard limit and we need only set 1796 * rlim_max. 1797 */ 1798 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 1799 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, 1800 p)) 1801 rlp64->rlim_max = rval->rcv_value; 1802 else 1803 rlp64->rlim_max = RLIM64_INFINITY; 1804 if (!soft_limit_seen) 1805 rlp64->rlim_cur = rlp64->rlim_max; 1806 1807 mutex_exit(&rset->rcs_lock); 1808 return (0); 1809 } 1810 1811 if (rval == NULL) { 1812 /* 1813 * This control sequence is corrupt, as it is not terminated by 1814 * a system privileged control value. 1815 */ 1816 mutex_exit(&rset->rcs_lock); 1817 return (-1); 1818 } 1819 1820 /* 1821 * 4. If we run into a RCPRIV_SYSTEM value, then the hard limit (and 1822 * the soft, if we haven't a soft candidate) should be the value of the 1823 * system control value. 1824 */ 1825 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 1826 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, p)) 1827 rlp64->rlim_max = rval->rcv_value; 1828 else 1829 rlp64->rlim_max = RLIM64_INFINITY; 1830 1831 if (!soft_limit_seen) 1832 rlp64->rlim_cur = rlp64->rlim_max; 1833 1834 mutex_exit(&rset->rcs_lock); 1835 return (0); 1836 } 1837 1838 /* 1839 * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t) 1840 * 1841 * Overview 1842 * Before making a series of calls to rctl_rlimit_set(), we must have a 1843 * preallocated batch of resource control values, as rctl_rlimit_set() can 1844 * potentially consume two resource control values per call. 1845 * 1846 * Return values 1847 * A populated resource control allocation group with 2n resource control 1848 * values. 1849 * 1850 * Caller's context 1851 * Must be safe for KM_SLEEP allocations. 1852 */ 1853 rctl_alloc_gp_t * 1854 rctl_rlimit_set_prealloc(uint_t n) 1855 { 1856 rctl_alloc_gp_t *gp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1857 1858 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1859 1860 gp->rcag_nvals = 2 * n; 1861 1862 rctl_gp_alloc(gp); 1863 1864 return (gp); 1865 } 1866 1867 /* 1868 * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int, 1869 * int) 1870 * 1871 * Overview 1872 * To support rlimit compatibility, we need a function which takes a 64-bit 1873 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 1874 * This operation is only intended for legacy rlimits. 1875 * 1876 * The implementation of rctl_rlimit_set() is a bit clever, as it tries to 1877 * minimize the number of values placed on the value sequence in various 1878 * cases. Furthermore, we don't allow multiple identical privilege-action 1879 * values on the same sequence. (That is, we don't want a sequence like 1880 * "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel 1881 * memory.) So we want to delete any values with the same privilege value and 1882 * action. 1883 * 1884 * Return values 1885 * 0 for successful set, errno otherwise. Errno will be either EINVAL 1886 * or EPERM, in keeping with defined errnos for ulimit() and setrlimit() 1887 * system calls. 1888 */ 1889 /*ARGSUSED*/ 1890 int 1891 rctl_rlimit_set(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64, 1892 rctl_alloc_gp_t *ragp, int flagaction, int signal, const cred_t *cr) 1893 { 1894 rctl_t *rctl; 1895 rctl_val_t *rval, *rval_priv, *rval_basic; 1896 rctl_set_t *rset = p->p_rctls; 1897 rctl_qty_t max; 1898 rctl_entity_p_t e; 1899 struct rlimit64 cur_rl; 1900 1901 e.rcep_t = RCENTITY_PROCESS; 1902 e.rcep_p.proc = p; 1903 1904 if (rlp64->rlim_cur > rlp64->rlim_max) 1905 return (EINVAL); 1906 1907 if (rctl_rlimit_get(rc, p, &cur_rl) == -1) 1908 return (EINVAL); 1909 1910 /* 1911 * If we are not privileged, we can only lower the hard limit. 1912 */ 1913 if ((rlp64->rlim_max > cur_rl.rlim_max) && 1914 cur_rl.rlim_max != RLIM64_INFINITY && 1915 secpolicy_resource(cr) != 0) 1916 return (EPERM); 1917 1918 mutex_enter(&rset->rcs_lock); 1919 1920 if (rctl_set_find(rset, rc, &rctl) == -1) { 1921 mutex_exit(&rset->rcs_lock); 1922 return (EINVAL); 1923 } 1924 1925 rval_priv = rctl_gp_detach_val(ragp); 1926 1927 rval = rctl->rc_values; 1928 1929 while (rval != NULL) { 1930 rctl_val_t *next = rval->rcv_next; 1931 1932 if (rval->rcv_privilege == RCPRIV_SYSTEM) 1933 break; 1934 1935 if ((rval->rcv_privilege == RCPRIV_BASIC) || 1936 (rval->rcv_flagaction & ~RCTL_LOCAL_ACTION_MASK) == 1937 (flagaction & ~RCTL_LOCAL_ACTION_MASK)) { 1938 if (rctl->rc_cursor == rval) { 1939 rctl->rc_cursor = rval->rcv_next; 1940 rctl_val_list_reset(rctl->rc_cursor); 1941 RCTLOP_SET(rctl, p, &e, rctl_model_value( 1942 rctl->rc_dict_entry, p, 1943 rctl->rc_cursor->rcv_value)); 1944 } 1945 (void) rctl_val_list_delete(&rctl->rc_values, rval); 1946 } 1947 1948 rval = next; 1949 } 1950 1951 rval_priv->rcv_privilege = RCPRIV_PRIVILEGED; 1952 rval_priv->rcv_flagaction = flagaction; 1953 if (rlp64->rlim_max == RLIM64_INFINITY) { 1954 rval_priv->rcv_flagaction |= RCTL_LOCAL_MAXIMAL; 1955 max = rctl->rc_dict_entry->rcd_max_native; 1956 } else { 1957 max = rlp64->rlim_max; 1958 } 1959 rval_priv->rcv_value = max; 1960 rval_priv->rcv_action_signal = signal; 1961 rval_priv->rcv_action_recipient = NULL; 1962 rval_priv->rcv_action_recip_pid = -1; 1963 rval_priv->rcv_firing_time = 0; 1964 rval_priv->rcv_prev = rval_priv->rcv_next = NULL; 1965 1966 (void) rctl_val_list_insert(&rctl->rc_values, rval_priv); 1967 rctl->rc_cursor = rval_priv; 1968 rctl_val_list_reset(rctl->rc_cursor); 1969 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 1970 rctl->rc_cursor->rcv_value)); 1971 1972 if (rlp64->rlim_cur != RLIM64_INFINITY && rlp64->rlim_cur < max) { 1973 rval_basic = rctl_gp_detach_val(ragp); 1974 1975 rval_basic->rcv_privilege = RCPRIV_BASIC; 1976 rval_basic->rcv_value = rlp64->rlim_cur; 1977 rval_basic->rcv_flagaction = flagaction; 1978 rval_basic->rcv_action_signal = signal; 1979 rval_basic->rcv_action_recipient = p; 1980 rval_basic->rcv_action_recip_pid = p->p_pid; 1981 rval_basic->rcv_firing_time = 0; 1982 rval_basic->rcv_prev = rval_basic->rcv_next = NULL; 1983 1984 (void) rctl_val_list_insert(&rctl->rc_values, rval_basic); 1985 rctl->rc_cursor = rval_basic; 1986 rctl_val_list_reset(rctl->rc_cursor); 1987 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 1988 rctl->rc_cursor->rcv_value)); 1989 } 1990 1991 ASSERT(rctl->rc_cursor != NULL); 1992 1993 mutex_exit(&rset->rcs_lock); 1994 return (0); 1995 } 1996 1997 1998 /* 1999 * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t, 2000 * rlim64_t, rctl_ops_t *) 2001 * 2002 * Overview 2003 * rctl_register() performs a look-up in the dictionary of rctls 2004 * active on the system; if a rctl of that name is absent, an entry is 2005 * made into the dictionary. The rctl is returned with its reference 2006 * count incremented by one. If the rctl name already exists, we panic. 2007 * (Were the resource control system to support dynamic loading and unloading, 2008 * which it is structured for, duplicate registration should lead to load 2009 * failure instead of panicking.) 2010 * 2011 * Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be 2012 * defined. This limit contains the highest possible value for this quantity 2013 * on the system. Furthermore, the registered control must provide infinite 2014 * values for all applicable address space models supported by the operating 2015 * system. Attempts to set resource control values beyond the system limit 2016 * will fail. 2017 * 2018 * Return values 2019 * The rctl's ID. 2020 * 2021 * Caller's context 2022 * Caller must be in a context suitable for KM_SLEEP allocations. 2023 */ 2024 rctl_hndl_t 2025 rctl_register( 2026 const char *name, 2027 rctl_entity_t entity, 2028 int global_flags, 2029 rlim64_t max_native, 2030 rlim64_t max_ilp32, 2031 rctl_ops_t *ops) 2032 { 2033 rctl_t *rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 2034 rctl_val_t *rctl_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 2035 rctl_dict_entry_t *rctl_de = kmem_zalloc(sizeof (rctl_dict_entry_t), 2036 KM_SLEEP); 2037 rctl_t *old_rctl; 2038 rctl_hndl_t rhndl; 2039 int localflags; 2040 2041 ASSERT(ops != NULL); 2042 2043 bzero(rctl, sizeof (rctl_t)); 2044 bzero(rctl_val, sizeof (rctl_val_t)); 2045 2046 if (global_flags & RCTL_GLOBAL_DENY_NEVER) 2047 localflags = RCTL_LOCAL_MAXIMAL; 2048 else 2049 localflags = RCTL_LOCAL_MAXIMAL | RCTL_LOCAL_DENY; 2050 2051 rctl_val->rcv_privilege = RCPRIV_SYSTEM; 2052 rctl_val->rcv_value = max_native; 2053 rctl_val->rcv_flagaction = localflags; 2054 rctl_val->rcv_action_signal = 0; 2055 rctl_val->rcv_action_recipient = NULL; 2056 rctl_val->rcv_action_recip_pid = -1; 2057 rctl_val->rcv_firing_time = 0; 2058 rctl_val->rcv_next = NULL; 2059 rctl_val->rcv_prev = NULL; 2060 2061 rctl_de->rcd_name = (char *)name; 2062 rctl_de->rcd_default_value = rctl_val; 2063 rctl_de->rcd_max_native = max_native; 2064 rctl_de->rcd_max_ilp32 = max_ilp32; 2065 rctl_de->rcd_entity = entity; 2066 rctl_de->rcd_ops = ops; 2067 rctl_de->rcd_flagaction = global_flags; 2068 2069 rctl->rc_dict_entry = rctl_de; 2070 rctl->rc_values = rctl_val; 2071 2072 /* 2073 * 1. Take global lock, validate nonexistence of name, get ID. 2074 */ 2075 mutex_enter(&rctl_dict_lock); 2076 2077 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 2078 (mod_hash_val_t *)&rhndl) != MH_ERR_NOTFOUND) 2079 panic("duplicate registration of rctl %s", name); 2080 2081 rhndl = rctl_de->rcd_id = rctl->rc_id = 2082 (rctl_hndl_t)id_alloc(rctl_ids); 2083 2084 /* 2085 * 2. Insert name-entry pair in rctl_dict_by_name. 2086 */ 2087 if (mod_hash_insert(rctl_dict_by_name, (mod_hash_key_t)name, 2088 (mod_hash_val_t)rctl_de)) 2089 panic("unable to insert rctl dict entry for %s (%u)", name, 2090 (uint_t)rctl->rc_id); 2091 2092 /* 2093 * 3. Insert ID-rctl_t * pair in rctl_dict. 2094 */ 2095 if (mod_hash_find(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2096 (mod_hash_val_t *)&old_rctl) != MH_ERR_NOTFOUND) 2097 panic("duplicate rctl ID %u registered", rctl->rc_id); 2098 2099 if (mod_hash_insert(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2100 (mod_hash_val_t)rctl)) 2101 panic("unable to insert rctl %s/%u (%p)", name, 2102 (uint_t)rctl->rc_id, rctl); 2103 2104 /* 2105 * 3a. Insert rctl_dict_entry_t * in appropriate entity list. 2106 */ 2107 2108 mutex_enter(&rctl_lists_lock); 2109 2110 switch (entity) { 2111 case RCENTITY_ZONE: 2112 case RCENTITY_PROJECT: 2113 case RCENTITY_TASK: 2114 case RCENTITY_PROCESS: 2115 rctl_de->rcd_next = rctl_lists[entity]; 2116 rctl_lists[entity] = rctl_de; 2117 break; 2118 default: 2119 panic("registering unknown rctl entity %d (%s)", entity, 2120 name); 2121 break; 2122 } 2123 2124 mutex_exit(&rctl_lists_lock); 2125 2126 /* 2127 * 4. Drop lock. 2128 */ 2129 mutex_exit(&rctl_dict_lock); 2130 2131 return (rhndl); 2132 } 2133 2134 /* 2135 * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, 2136 * rctl_val_t *v) 2137 * 2138 * Overview 2139 * rctl_global_action() takes, in according with the flags on the rctl_dict 2140 * entry for the given control, the appropriate actions on the exceeded 2141 * control value. Additionally, rctl_global_action() updates the firing time 2142 * on the exceeded value. 2143 * 2144 * Return values 2145 * A bitmask reflecting the actions actually taken. 2146 * 2147 * Caller's context 2148 * No restrictions on context. 2149 */ 2150 /*ARGSUSED*/ 2151 static int 2152 rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v) 2153 { 2154 rctl_dict_entry_t *rde = r->rc_dict_entry; 2155 const char *pr, *en; 2156 id_t id; 2157 int ret = 0; 2158 2159 v->rcv_firing_time = gethrtime(); 2160 2161 switch (v->rcv_privilege) { 2162 case RCPRIV_BASIC: 2163 pr = "basic"; 2164 break; 2165 case RCPRIV_PRIVILEGED: 2166 pr = "privileged"; 2167 break; 2168 case RCPRIV_SYSTEM: 2169 pr = "system"; 2170 break; 2171 default: 2172 pr = "unknown"; 2173 break; 2174 } 2175 2176 switch (rde->rcd_entity) { 2177 case RCENTITY_PROCESS: 2178 en = "process"; 2179 id = p->p_pid; 2180 break; 2181 case RCENTITY_TASK: 2182 en = "task"; 2183 id = p->p_task->tk_tkid; 2184 break; 2185 case RCENTITY_PROJECT: 2186 en = "project"; 2187 id = p->p_task->tk_proj->kpj_id; 2188 break; 2189 case RCENTITY_ZONE: 2190 en = "zone"; 2191 id = p->p_zone->zone_id; 2192 break; 2193 default: 2194 en = "unknown entity associated with pid"; 2195 id = p->p_pid; 2196 break; 2197 } 2198 2199 if (rde->rcd_flagaction & RCTL_GLOBAL_SYSLOG) { 2200 (void) strlog(0, 0, 0, 2201 rde->rcd_strlog_flags | log_global.lz_active, 2202 "%s rctl %s (value %llu) exceeded by %s %d", pr, 2203 rde->rcd_name, v->rcv_value, en, id); 2204 } 2205 2206 if (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) 2207 ret |= RCT_DENY; 2208 2209 return (ret); 2210 } 2211 2212 static int 2213 rctl_local_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v, 2214 uint_t safety) 2215 { 2216 int ret = 0; 2217 sigqueue_t *sqp = NULL; 2218 rctl_dict_entry_t *rde = r->rc_dict_entry; 2219 int unobservable = (rde->rcd_flagaction & RCTL_GLOBAL_UNOBSERVABLE); 2220 2221 proc_t *recipient = v->rcv_action_recipient; 2222 id_t recip_pid = v->rcv_action_recip_pid; 2223 int recip_signal = v->rcv_action_signal; 2224 uint_t flagaction = v->rcv_flagaction; 2225 2226 if (safety == RCA_UNSAFE_ALL) { 2227 if (flagaction & RCTL_LOCAL_DENY) { 2228 ret |= RCT_DENY; 2229 } 2230 return (ret); 2231 } 2232 2233 if (flagaction & RCTL_LOCAL_SIGNAL) { 2234 /* 2235 * We can build a siginfo only in the case that it is 2236 * safe for us to drop p_lock. (For asynchronous 2237 * checks this is currently not true.) 2238 */ 2239 if (safety == RCA_SAFE) { 2240 mutex_exit(&rset->rcs_lock); 2241 mutex_exit(&p->p_lock); 2242 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 2243 mutex_enter(&p->p_lock); 2244 mutex_enter(&rset->rcs_lock); 2245 2246 sqp->sq_info.si_signo = recip_signal; 2247 sqp->sq_info.si_code = SI_RCTL; 2248 sqp->sq_info.si_errno = 0; 2249 sqp->sq_info.si_entity = (int)rde->rcd_entity; 2250 } 2251 2252 if (recipient == NULL || recipient == p) { 2253 ret |= RCT_SIGNAL; 2254 2255 if (sqp == NULL) { 2256 sigtoproc(p, NULL, recip_signal); 2257 } else if (p == curproc) { 2258 /* 2259 * Then this is a synchronous test and we can 2260 * direct the signal at the violating thread. 2261 */ 2262 sigaddqa(curproc, curthread, sqp); 2263 } else { 2264 sigaddqa(p, NULL, sqp); 2265 } 2266 } else if (!unobservable) { 2267 proc_t *rp; 2268 2269 mutex_exit(&rset->rcs_lock); 2270 mutex_exit(&p->p_lock); 2271 2272 mutex_enter(&pidlock); 2273 if ((rp = prfind(recip_pid)) == recipient) { 2274 /* 2275 * Recipient process is still alive, but may not 2276 * be in this task or project any longer. In 2277 * this case, the recipient's resource control 2278 * set pertinent to this control will have 2279 * changed--and we will not deliver the signal, 2280 * as the recipient process is trying to tear 2281 * itself off of its former set. 2282 */ 2283 mutex_enter(&rp->p_lock); 2284 mutex_exit(&pidlock); 2285 2286 if (rctl_entity_obtain_rset(rde, rp) == rset) { 2287 ret |= RCT_SIGNAL; 2288 2289 if (sqp == NULL) 2290 sigtoproc(rp, NULL, 2291 recip_signal); 2292 else 2293 sigaddqa(rp, NULL, sqp); 2294 } else if (sqp) { 2295 kmem_free(sqp, sizeof (sigqueue_t)); 2296 } 2297 mutex_exit(&rp->p_lock); 2298 } else { 2299 mutex_exit(&pidlock); 2300 if (sqp) 2301 kmem_free(sqp, sizeof (sigqueue_t)); 2302 } 2303 2304 mutex_enter(&p->p_lock); 2305 /* 2306 * Since we dropped p_lock, we may no longer be in the 2307 * same task or project as we were at entry. It is thus 2308 * unsafe for us to reacquire the set lock at this 2309 * point; callers of rctl_local_action() must handle 2310 * this possibility. 2311 */ 2312 ret |= RCT_LK_ABANDONED; 2313 } else if (sqp) { 2314 kmem_free(sqp, sizeof (sigqueue_t)); 2315 } 2316 } 2317 2318 if ((flagaction & RCTL_LOCAL_DENY) && 2319 (recipient == NULL || recipient == p)) { 2320 ret |= RCT_DENY; 2321 } 2322 2323 return (ret); 2324 } 2325 2326 /* 2327 * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t) 2328 * 2329 * Overview 2330 * Take the action associated with the enforced value (as defined by 2331 * rctl_get_enforced_value()) being exceeded or encountered. Possibly perform 2332 * a restricted subset of the available actions, if circumstances dictate that 2333 * we cannot safely allocate memory (for a sigqueue_t) or guarantee process 2334 * persistence across the duration of the function (an asynchronous action). 2335 * 2336 * Return values 2337 * Actions taken, according to the rctl_test bitmask. 2338 * 2339 * Caller's context 2340 * Safe to acquire rcs_lock. 2341 */ 2342 int 2343 rctl_action(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, uint_t safety) 2344 { 2345 return (rctl_action_entity(hndl, rset, p, NULL, safety)); 2346 } 2347 2348 int 2349 rctl_action_entity(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, 2350 rctl_entity_p_t *e, uint_t safety) 2351 { 2352 int ret = RCT_NONE; 2353 rctl_t *lrctl; 2354 rctl_entity_p_t e_tmp; 2355 2356 rctl_action_acquire: 2357 mutex_enter(&rset->rcs_lock); 2358 if (rctl_set_find(rset, hndl, &lrctl) == -1) { 2359 mutex_exit(&rset->rcs_lock); 2360 return (ret); 2361 } 2362 2363 if (e == NULL) { 2364 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2365 p, &e_tmp); 2366 e = &e_tmp; 2367 } 2368 2369 if ((ret & RCT_LK_ABANDONED) == 0) { 2370 ret |= rctl_global_action(lrctl, rset, p, lrctl->rc_cursor); 2371 2372 RCTLOP_ACTION(lrctl, p, e); 2373 2374 ret |= rctl_local_action(lrctl, rset, p, 2375 lrctl->rc_cursor, safety); 2376 2377 if (ret & RCT_LK_ABANDONED) 2378 goto rctl_action_acquire; 2379 } 2380 2381 ret &= ~RCT_LK_ABANDONED; 2382 2383 if (!(ret & RCT_DENY) && 2384 lrctl->rc_cursor->rcv_next != NULL) { 2385 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2386 2387 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2388 p, lrctl->rc_cursor->rcv_value)); 2389 2390 } 2391 mutex_exit(&rset->rcs_lock); 2392 2393 return (ret); 2394 } 2395 2396 /* 2397 * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t) 2398 * 2399 * Overview 2400 * Increment the resource associated with the given handle, returning zero if 2401 * the incremented value does not exceed the threshold for the current limit 2402 * on the resource. 2403 * 2404 * Return values 2405 * Actions taken, according to the rctl_test bitmask. 2406 * 2407 * Caller's context 2408 * p_lock held by caller. 2409 */ 2410 /*ARGSUSED*/ 2411 int 2412 rctl_test(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2413 rctl_qty_t incr, uint_t flags) 2414 { 2415 return (rctl_test_entity(rhndl, rset, p, NULL, incr, flags)); 2416 } 2417 2418 int 2419 rctl_test_entity(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2420 rctl_entity_p_t *e, rctl_qty_t incr, uint_t flags) 2421 { 2422 rctl_t *lrctl; 2423 int ret = RCT_NONE; 2424 rctl_entity_p_t e_tmp; 2425 if (p == &p0) { 2426 /* 2427 * We don't enforce rctls on the kernel itself. 2428 */ 2429 return (ret); 2430 } 2431 2432 rctl_test_acquire: 2433 ASSERT(MUTEX_HELD(&p->p_lock)); 2434 2435 mutex_enter(&rset->rcs_lock); 2436 2437 /* 2438 * Dereference from rctl_set. We don't enforce newly loaded controls 2439 * that haven't been set on this entity (since the only valid value is 2440 * the infinite system value). 2441 */ 2442 if (rctl_set_find(rset, rhndl, &lrctl) == -1) { 2443 mutex_exit(&rset->rcs_lock); 2444 return (ret); 2445 } 2446 2447 /* 2448 * This control is currently unenforced: maximal value on control 2449 * supporting infinitely available resource. 2450 */ 2451 if ((lrctl->rc_dict_entry->rcd_flagaction & RCTL_GLOBAL_INFINITE) && 2452 (lrctl->rc_cursor->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) { 2453 2454 mutex_exit(&rset->rcs_lock); 2455 return (ret); 2456 } 2457 2458 /* 2459 * If we have been called by rctl_test, look up the entity pointer 2460 * from the proc pointer. 2461 */ 2462 if (e == NULL) { 2463 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2464 p, &e_tmp); 2465 e = &e_tmp; 2466 } 2467 2468 /* 2469 * Get enforced rctl value and current usage. Test the increment 2470 * with the current usage against the enforced value--take action as 2471 * necessary. 2472 */ 2473 while (RCTLOP_TEST(lrctl, p, e, lrctl->rc_cursor, incr, flags)) { 2474 if ((ret & RCT_LK_ABANDONED) == 0) { 2475 ret |= rctl_global_action(lrctl, rset, p, 2476 lrctl->rc_cursor); 2477 2478 RCTLOP_ACTION(lrctl, p, e); 2479 2480 ret |= rctl_local_action(lrctl, rset, p, 2481 lrctl->rc_cursor, flags); 2482 2483 if (ret & RCT_LK_ABANDONED) 2484 goto rctl_test_acquire; 2485 } 2486 2487 ret &= ~RCT_LK_ABANDONED; 2488 2489 if ((ret & RCT_DENY) == RCT_DENY || 2490 lrctl->rc_cursor->rcv_next == NULL) { 2491 ret |= RCT_DENY; 2492 break; 2493 } 2494 2495 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2496 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2497 p, lrctl->rc_cursor->rcv_value)); 2498 } 2499 2500 mutex_exit(&rset->rcs_lock); 2501 2502 return (ret); 2503 } 2504 2505 /* 2506 * void rctl_init(void) 2507 * 2508 * Overview 2509 * Initialize the rctl subsystem, including the primoridal rctls 2510 * provided by the system. New subsystem-specific rctls should _not_ be 2511 * initialized here. (Do it in your own file.) 2512 * 2513 * Return values 2514 * None. 2515 * 2516 * Caller's context 2517 * Safe for KM_SLEEP allocations. Must be called prior to any process model 2518 * initialization. 2519 */ 2520 void 2521 rctl_init(void) 2522 { 2523 rctl_cache = kmem_cache_create("rctl_cache", sizeof (rctl_t), 2524 0, NULL, NULL, NULL, NULL, NULL, 0); 2525 rctl_val_cache = kmem_cache_create("rctl_val_cache", 2526 sizeof (rctl_val_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 2527 2528 rctl_dict = mod_hash_create_extended("rctl_dict", 2529 rctl_dict_size, mod_hash_null_keydtor, rctl_dict_val_dtor, 2530 rctl_dict_hash_by_id, NULL, rctl_dict_id_cmp, KM_SLEEP); 2531 rctl_dict_by_name = mod_hash_create_strhash( 2532 "rctl_handles_by_name", rctl_dict_size, 2533 mod_hash_null_valdtor); 2534 rctl_ids = id_space_create("rctl_ids", 1, max_rctl_hndl); 2535 bzero(rctl_lists, (RC_MAX_ENTITY + 1) * sizeof (rctl_dict_entry_t *)); 2536 2537 rctlproc_init(); 2538 } 2539