1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/atomic.h> 29 #include <sys/cmn_err.h> 30 #include <sys/id_space.h> 31 #include <sys/kmem.h> 32 #include <sys/kstat.h> 33 #include <sys/log.h> 34 #include <sys/modctl.h> 35 #include <sys/modhash.h> 36 #include <sys/mutex.h> 37 #include <sys/proc.h> 38 #include <sys/procset.h> 39 #include <sys/project.h> 40 #include <sys/resource.h> 41 #include <sys/rctl.h> 42 #include <sys/siginfo.h> 43 #include <sys/strlog.h> 44 #include <sys/systm.h> 45 #include <sys/task.h> 46 #include <sys/types.h> 47 #include <sys/policy.h> 48 #include <sys/zone.h> 49 50 /* 51 * Resource controls (rctls) 52 * 53 * The rctl subsystem provides a mechanism for kernel components to 54 * register their individual resource controls with the system as a whole, 55 * such that those controls can subscribe to specific actions while being 56 * associated with the various process-model entities provided by the kernel: 57 * the process, the task, the project, and the zone. (In principle, only 58 * minor modifications would be required to connect the resource control 59 * functionality to non-process-model entities associated with the system.) 60 * 61 * Subsystems register their rctls via rctl_register(). Subsystems 62 * also wishing to provide additional limits on a given rctl can modify 63 * them once they have the rctl handle. Each subsystem should store the 64 * handle to their rctl for direct access. 65 * 66 * A primary dictionary, rctl_dict, contains a hash of id to the default 67 * control definition for each controlled resource-entity pair on the system. 68 * A secondary dictionary, rctl_dict_by_name, contains a hash of name to 69 * resource control handles. The resource control handles are distributed by 70 * the rctl_ids ID space. The handles are private and not to be 71 * advertised to userland; all userland interactions are via the rctl 72 * names. 73 * 74 * Entities inherit their rctls from their predecessor. Since projects have 75 * no ancestor, they inherit their rctls from the rctl dict for project 76 * rctls. It is expected that project controls will be set to their 77 * appropriate values shortly after project creation, presumably from a 78 * policy source such as the project database. 79 * 80 * Data structures 81 * The rctl_set_t attached to each of the process model entities is a simple 82 * hash table keyed on the rctl handle assigned at registration. The entries 83 * in the hash table are rctl_t's, whose relationship with the active control 84 * values on that resource and with the global state of the resource we 85 * illustrate below: 86 * 87 * rctl_dict[key] --> rctl_dict_entry 88 * ^ 89 * | 90 * +--+---+ 91 * rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL 92 * +--+---+ ^ 93 * | | 94 * +------- cursor ------+ 95 * 96 * That is, the rctl contains a back pointer to the global resource control 97 * state for this resource, which is also available in the rctl_dict hash 98 * table mentioned earlier. The rctl contains two pointers to resource 99 * control values: one, values, indicates the entire sequence of control 100 * values; the other, cursor, indicates the currently active control 101 * value--the next value to be enforced. The value list itself is an open, 102 * doubly-linked list, the last non-NULL member of which is the system value 103 * for that resource (being the theoretical/conventional maximum allowable 104 * value for the resource on this OS instance). 105 * 106 * Ops Vector 107 * Subsystems publishing rctls need not provide instances of all of the 108 * functions specified by the ops vector. In particular, if general 109 * rctl_*() entry points are not being called, certain functions can be 110 * omitted. These align as follows: 111 * 112 * rctl_set() 113 * You may wish to provide a set callback if locking circumstances prevent 114 * it or if the performance cost of requesting the enforced value from the 115 * resource control is prohibitively expensive. For instance, the currently 116 * enforced file size limit is stored on the process in the p_fsz_ctl to 117 * maintain read()/write() performance. 118 * 119 * rctl_test() 120 * You must provide a test callback if you are using the rctl_test() 121 * interface. An action callback is optional. 122 * 123 * rctl_action() 124 * You may wish to provide an action callback. 125 * 126 * Registration 127 * New resource controls can be added to a running instance by loaded modules 128 * via registration. (The current implementation does not support unloadable 129 * modules; this functionality can be added if needed, via an 130 * activation/deactivation interface involving the manipulation of the 131 * ops vector for the resource control(s) needing to support unloading.) 132 * 133 * Control value ordering 134 * Because the rctl_val chain on each rctl must be navigable in a 135 * deterministic way, we have to define an ordering on the rctl_val_t's. The 136 * defined order is (flags & [maximal], value, flags & [deny-action], 137 * privilege). 138 * 139 * Locking 140 * rctl_dict_lock must be acquired prior to rctl_lists_lock. Since 141 * rctl_dict_lock or rctl_lists_lock can be called at the enforcement point 142 * of any subsystem, holding subsystem locks, it is at all times inappropriate 143 * to call kmem_alloc(., KM_SLEEP) while holding either of these locks. 144 * Traversing any of the various resource control entity lists requires 145 * holding rctl_lists_lock. 146 * 147 * Each individual resource control set associated with an entity must have 148 * its rcs_lock held for the duration of any operations that would add 149 * resource controls or control values to the set. 150 * 151 * The locking subsequence of interest is: p_lock, rctl_dict_lock, 152 * rctl_lists_lock, entity->rcs_lock. 153 * 154 * The projects(4) database and project entity resource controls 155 * A special case is made for RCENTITY_PROJECT values set through the 156 * setproject(3PROJECT) interface. setproject() makes use of a private 157 * interface, setprojrctl(), which passes through an array of resource control 158 * blocks that need to be set while holding the entity->rcs_lock. This 159 * ensures that the act of modifying a project's resource controls is 160 * "atomic" within the kernel. 161 * 162 * Within the rctl sub-system, we provide two interfaces that are only used by 163 * the setprojrctl() code path - rctl_local_insert_all() and 164 * rctl_local_replace_all(). rctl_local_insert_all() will ensure that the 165 * resource values specified in *new_values are applied. 166 * rctl_local_replace_all() will purge the current rctl->rc_projdb and 167 * rctl->rc_values entries, and apply the *new_values. 168 * 169 * These functions modify not only the linked list of active resource controls 170 * (rctl->rc_values), but also a "cached" linked list (rctl->rc_projdb) of 171 * values set through these interfaces. To clarify: 172 * 173 * rctl->rc_values - a linked list of rctl_val_t. These are the active 174 * resource values associated with this rctl, and may have been set by 175 * setrctl() - via prctl(1M), or by setprojrctl() - via 176 * setproject(3PROJECT). 177 * 178 * rctl->rc_projdb - a linked list of rctl_val_t. These reflect the 179 * resource values set by the setprojrctl() code path. rc_projdb is not 180 * referenced by any other component of the rctl sub-system. 181 * 182 * As various locks are held when calling these functions, we ensure that all 183 * the possible memory allocations are performed prior to calling the 184 * function. *alloc_values is a linked list of uninitialized rctl_val_t, 185 * which may be used to duplicate a new resource control value (passed in as 186 * one of the members of the *new_values linked list), in order to populate 187 * rctl->rc_values. 188 */ 189 190 id_t max_rctl_hndl = 32768; 191 int rctl_dict_size = 64; 192 int rctl_set_size = 8; 193 kmutex_t rctl_dict_lock; 194 mod_hash_t *rctl_dict; 195 mod_hash_t *rctl_dict_by_name; 196 id_space_t *rctl_ids; 197 kmem_cache_t *rctl_cache; /* kmem cache for rctl structures */ 198 kmem_cache_t *rctl_val_cache; /* kmem cache for rctl values */ 199 200 kmutex_t rctl_lists_lock; 201 rctl_dict_entry_t *rctl_lists[RC_MAX_ENTITY + 1]; 202 203 /* 204 * Default resource control operations and ops vector 205 * To be used if the particular rcontrol has no specific actions defined, or 206 * if the subsystem providing the control is quiescing (in preparation for 207 * unloading, presumably.) 208 * 209 * Resource controls with callbacks should fill the unused operations with the 210 * appropriate default impotent callback. 211 */ 212 /*ARGSUSED*/ 213 void 214 rcop_no_action(struct rctl *r, struct proc *p, rctl_entity_p_t *e) 215 { 216 } 217 218 /*ARGSUSED*/ 219 rctl_qty_t 220 rcop_no_usage(struct rctl *r, struct proc *p) 221 { 222 return (0); 223 } 224 225 /*ARGSUSED*/ 226 int 227 rcop_no_set(struct rctl *r, struct proc *p, rctl_entity_p_t *e, rctl_qty_t l) 228 { 229 return (0); 230 } 231 232 /*ARGSUSED*/ 233 int 234 rcop_no_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 235 struct rctl_val *rv, rctl_qty_t i, uint_t f) 236 { 237 return (0); 238 } 239 240 rctl_ops_t rctl_default_ops = { 241 rcop_no_action, 242 rcop_no_usage, 243 rcop_no_set, 244 rcop_no_test 245 }; 246 247 /* 248 * Default "absolute" resource control operation and ops vector 249 * Useful if there is no usage associated with the 250 * resource control. 251 */ 252 /*ARGSUSED*/ 253 int 254 rcop_absolute_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 255 struct rctl_val *rv, rctl_qty_t i, uint_t f) 256 { 257 return (i > rv->rcv_value); 258 } 259 260 rctl_ops_t rctl_absolute_ops = { 261 rcop_no_action, 262 rcop_no_usage, 263 rcop_no_set, 264 rcop_absolute_test 265 }; 266 267 /*ARGSUSED*/ 268 static uint_t 269 rctl_dict_hash_by_id(void *hash_data, mod_hash_key_t key) 270 { 271 return ((uint_t)(uintptr_t)key % rctl_dict_size); 272 } 273 274 static int 275 rctl_dict_id_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 276 { 277 uint_t u1 = (uint_t)(uintptr_t)key1; 278 uint_t u2 = (uint_t)(uintptr_t)key2; 279 280 if (u1 > u2) 281 return (1); 282 283 if (u1 == u2) 284 return (0); 285 286 return (-1); 287 } 288 289 static void 290 rctl_dict_val_dtor(mod_hash_val_t val) 291 { 292 rctl_dict_entry_t *kr = (rctl_dict_entry_t *)val; 293 294 kmem_free(kr, sizeof (rctl_dict_entry_t)); 295 } 296 297 /* 298 * size_t rctl_build_name_buf() 299 * 300 * Overview 301 * rctl_build_name_buf() walks all active resource controls in the dictionary, 302 * building a buffer of continguous NUL-terminated strings. 303 * 304 * Return values 305 * The size of the buffer is returned, the passed pointer's contents are 306 * modified to that of the location of the buffer. 307 * 308 * Caller's context 309 * Caller must be in a context suitable for KM_SLEEP allocations. 310 */ 311 size_t 312 rctl_build_name_buf(char **rbufp) 313 { 314 size_t req_size, cpy_size; 315 char *rbufloc; 316 int i; 317 318 rctl_rebuild_name_buf: 319 req_size = cpy_size = 0; 320 321 /* 322 * Calculate needed buffer length. 323 */ 324 mutex_enter(&rctl_lists_lock); 325 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 326 rctl_dict_entry_t *rde; 327 328 for (rde = rctl_lists[i]; 329 rde != NULL; 330 rde = rde->rcd_next) 331 req_size += strlen(rde->rcd_name) + 1; 332 } 333 mutex_exit(&rctl_lists_lock); 334 335 rbufloc = *rbufp = kmem_alloc(req_size, KM_SLEEP); 336 337 /* 338 * Copy rctl names into our buffer. If the copy length exceeds the 339 * allocate length (due to registration changes), stop copying, free the 340 * buffer, and start again. 341 */ 342 mutex_enter(&rctl_lists_lock); 343 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 344 rctl_dict_entry_t *rde; 345 346 for (rde = rctl_lists[i]; 347 rde != NULL; 348 rde = rde->rcd_next) { 349 size_t length = strlen(rde->rcd_name) + 1; 350 351 cpy_size += length; 352 353 if (cpy_size > req_size) { 354 kmem_free(*rbufp, req_size); 355 mutex_exit(&rctl_lists_lock); 356 goto rctl_rebuild_name_buf; 357 } 358 359 bcopy(rde->rcd_name, rbufloc, length); 360 rbufloc += length; 361 } 362 } 363 mutex_exit(&rctl_lists_lock); 364 365 return (req_size); 366 } 367 368 /* 369 * rctl_dict_entry_t *rctl_dict_lookup(const char *) 370 * 371 * Overview 372 * rctl_dict_lookup() returns the resource control dictionary entry for the 373 * named resource control. 374 * 375 * Return values 376 * A pointer to the appropriate resource control dictionary entry, or NULL if 377 * no such named entry exists. 378 * 379 * Caller's context 380 * Caller must not be holding rctl_dict_lock. 381 */ 382 rctl_dict_entry_t * 383 rctl_dict_lookup(const char *name) 384 { 385 rctl_dict_entry_t *rde; 386 387 mutex_enter(&rctl_dict_lock); 388 389 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 390 (mod_hash_val_t *)&rde) == MH_ERR_NOTFOUND) { 391 mutex_exit(&rctl_dict_lock); 392 return (NULL); 393 } 394 395 mutex_exit(&rctl_dict_lock); 396 397 return (rde); 398 } 399 400 /* 401 * rctl_hndl_t rctl_hndl_lookup(const char *) 402 * 403 * Overview 404 * rctl_hndl_lookup() returns the resource control id (the "handle") for the 405 * named resource control. 406 * 407 * Return values 408 * The appropriate id, or -1 if no such named entry exists. 409 * 410 * Caller's context 411 * Caller must not be holding rctl_dict_lock. 412 */ 413 rctl_hndl_t 414 rctl_hndl_lookup(const char *name) 415 { 416 rctl_dict_entry_t *rde; 417 418 if ((rde = rctl_dict_lookup(name)) == NULL) 419 return (-1); 420 421 return (rde->rcd_id); 422 } 423 424 /* 425 * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t) 426 * 427 * Overview 428 * rctl_dict_lookup_hndl() completes the public lookup functions, by returning 429 * the resource control dictionary entry matching a given resource control id. 430 * 431 * Return values 432 * A pointer to the matching resource control dictionary entry, or NULL if the 433 * id does not match any existing entries. 434 * 435 * Caller's context 436 * Caller must not be holding rctl_lists_lock. 437 */ 438 rctl_dict_entry_t * 439 rctl_dict_lookup_hndl(rctl_hndl_t hndl) 440 { 441 uint_t i; 442 443 mutex_enter(&rctl_lists_lock); 444 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 445 rctl_dict_entry_t *rde; 446 447 for (rde = rctl_lists[i]; 448 rde != NULL; 449 rde = rde->rcd_next) 450 if (rde->rcd_id == hndl) { 451 mutex_exit(&rctl_lists_lock); 452 return (rde); 453 } 454 } 455 mutex_exit(&rctl_lists_lock); 456 457 return (NULL); 458 } 459 460 /* 461 * void rctl_add_default_limit(const char *name, rctl_qty_t value, 462 * rctl_priv_t privilege, uint_t action) 463 * 464 * Overview 465 * Create a default limit with specified value, privilege, and action. 466 * 467 * Return value 468 * No value returned. 469 */ 470 void 471 rctl_add_default_limit(const char *name, rctl_qty_t value, 472 rctl_priv_t privilege, uint_t action) 473 { 474 rctl_val_t *dval; 475 rctl_dict_entry_t *rde; 476 477 dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 478 bzero(dval, sizeof (rctl_val_t)); 479 dval->rcv_value = value; 480 dval->rcv_privilege = privilege; 481 dval->rcv_flagaction = action; 482 dval->rcv_action_recip_pid = -1; 483 484 rde = rctl_dict_lookup(name); 485 (void) rctl_val_list_insert(&rde->rcd_default_value, dval); 486 } 487 488 /* 489 * void rctl_add_legacy_limit(const char *name, const char *mname, 490 * const char *lname, rctl_qty_t dflt) 491 * 492 * Overview 493 * Create a default privileged limit, using the value obtained from 494 * /etc/system if it exists and is greater than the specified default 495 * value. Exists primarily for System V IPC. 496 * 497 * Return value 498 * No value returned. 499 */ 500 void 501 rctl_add_legacy_limit(const char *name, const char *mname, const char *lname, 502 rctl_qty_t dflt, rctl_qty_t max) 503 { 504 rctl_qty_t qty; 505 506 if (!mod_sysvar(mname, lname, &qty) || (qty < dflt)) 507 qty = dflt; 508 509 if (qty > max) 510 qty = max; 511 512 rctl_add_default_limit(name, qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY); 513 } 514 515 static rctl_set_t * 516 rctl_entity_obtain_rset(rctl_dict_entry_t *rcd, struct proc *p) 517 { 518 rctl_set_t *rset = NULL; 519 520 if (rcd == NULL) 521 return (NULL); 522 523 switch (rcd->rcd_entity) { 524 case RCENTITY_PROCESS: 525 rset = p->p_rctls; 526 break; 527 case RCENTITY_TASK: 528 ASSERT(MUTEX_HELD(&p->p_lock)); 529 if (p->p_task != NULL) 530 rset = p->p_task->tk_rctls; 531 break; 532 case RCENTITY_PROJECT: 533 ASSERT(MUTEX_HELD(&p->p_lock)); 534 if (p->p_task != NULL && 535 p->p_task->tk_proj != NULL) 536 rset = p->p_task->tk_proj->kpj_rctls; 537 break; 538 case RCENTITY_ZONE: 539 ASSERT(MUTEX_HELD(&p->p_lock)); 540 if (p->p_zone != NULL) 541 rset = p->p_zone->zone_rctls; 542 break; 543 default: 544 panic("unknown rctl entity type %d seen", rcd->rcd_entity); 545 break; 546 } 547 548 return (rset); 549 } 550 551 static void 552 rctl_entity_obtain_entity_p(rctl_entity_t entity, struct proc *p, 553 rctl_entity_p_t *e) 554 { 555 e->rcep_p.proc = NULL; 556 e->rcep_t = entity; 557 558 switch (entity) { 559 case RCENTITY_PROCESS: 560 e->rcep_p.proc = p; 561 break; 562 case RCENTITY_TASK: 563 ASSERT(MUTEX_HELD(&p->p_lock)); 564 if (p->p_task != NULL) 565 e->rcep_p.task = p->p_task; 566 break; 567 case RCENTITY_PROJECT: 568 ASSERT(MUTEX_HELD(&p->p_lock)); 569 if (p->p_task != NULL && 570 p->p_task->tk_proj != NULL) 571 e->rcep_p.proj = p->p_task->tk_proj; 572 break; 573 case RCENTITY_ZONE: 574 ASSERT(MUTEX_HELD(&p->p_lock)); 575 if (p->p_zone != NULL) 576 e->rcep_p.zone = p->p_zone; 577 break; 578 default: 579 panic("unknown rctl entity type %d seen", entity); 580 break; 581 } 582 } 583 584 static void 585 rctl_gp_alloc(rctl_alloc_gp_t *rcgp) 586 { 587 uint_t i; 588 589 if (rcgp->rcag_nctls > 0) { 590 rctl_t *prev = kmem_cache_alloc(rctl_cache, KM_SLEEP); 591 rctl_t *rctl = prev; 592 593 rcgp->rcag_ctls = prev; 594 595 for (i = 1; i < rcgp->rcag_nctls; i++) { 596 rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 597 prev->rc_next = rctl; 598 prev = rctl; 599 } 600 601 rctl->rc_next = NULL; 602 } 603 604 if (rcgp->rcag_nvals > 0) { 605 rctl_val_t *prev = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 606 rctl_val_t *rval = prev; 607 608 rcgp->rcag_vals = prev; 609 610 for (i = 1; i < rcgp->rcag_nvals; i++) { 611 rval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 612 prev->rcv_next = rval; 613 prev = rval; 614 } 615 616 rval->rcv_next = NULL; 617 } 618 619 } 620 621 static rctl_val_t * 622 rctl_gp_detach_val(rctl_alloc_gp_t *rcgp) 623 { 624 rctl_val_t *rval = rcgp->rcag_vals; 625 626 ASSERT(rcgp->rcag_nvals > 0); 627 rcgp->rcag_nvals--; 628 rcgp->rcag_vals = rval->rcv_next; 629 630 rval->rcv_next = NULL; 631 632 return (rval); 633 } 634 635 static rctl_t * 636 rctl_gp_detach_ctl(rctl_alloc_gp_t *rcgp) 637 { 638 rctl_t *rctl = rcgp->rcag_ctls; 639 640 ASSERT(rcgp->rcag_nctls > 0); 641 rcgp->rcag_nctls--; 642 rcgp->rcag_ctls = rctl->rc_next; 643 644 rctl->rc_next = NULL; 645 646 return (rctl); 647 648 } 649 650 static void 651 rctl_gp_free(rctl_alloc_gp_t *rcgp) 652 { 653 rctl_val_t *rval = rcgp->rcag_vals; 654 rctl_t *rctl = rcgp->rcag_ctls; 655 656 while (rval != NULL) { 657 rctl_val_t *next = rval->rcv_next; 658 659 kmem_cache_free(rctl_val_cache, rval); 660 rval = next; 661 } 662 663 while (rctl != NULL) { 664 rctl_t *next = rctl->rc_next; 665 666 kmem_cache_free(rctl_cache, rctl); 667 rctl = next; 668 } 669 } 670 671 /* 672 * void rctl_prealloc_destroy(rctl_alloc_gp_t *) 673 * 674 * Overview 675 * Release all unused memory allocated via one of the "prealloc" functions: 676 * rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc. 677 * 678 * Return values 679 * None. 680 * 681 * Caller's context 682 * No restrictions on context. 683 */ 684 void 685 rctl_prealloc_destroy(rctl_alloc_gp_t *gp) 686 { 687 rctl_gp_free(gp); 688 kmem_free(gp, sizeof (rctl_alloc_gp_t)); 689 } 690 691 /* 692 * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int) 693 * 694 * Overview 695 * This function defines an ordering to rctl_val_t's in order to allow 696 * for correct placement in value lists. When the imprecise flag is set, 697 * the action recipient is ignored. This is to facilitate insert, 698 * delete, and replace operations by rctlsys. 699 * 700 * Return values 701 * 0 if the val_t's are are considered identical 702 * -1 if a is ordered lower than b 703 * 1 if a is lowered higher than b 704 * 705 * Caller's context 706 * No restrictions on context. 707 */ 708 int 709 rctl_val_cmp(rctl_val_t *a, rctl_val_t *b, int imprecise) 710 { 711 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) < 712 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 713 return (-1); 714 715 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) > 716 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 717 return (1); 718 719 if (a->rcv_value < b->rcv_value) 720 return (-1); 721 722 if (a->rcv_value > b->rcv_value) 723 return (1); 724 725 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) < 726 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 727 return (-1); 728 729 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) > 730 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 731 return (1); 732 733 if (a->rcv_privilege < b->rcv_privilege) 734 return (-1); 735 736 if (a->rcv_privilege > b->rcv_privilege) 737 return (1); 738 739 if (imprecise) 740 return (0); 741 742 if (a->rcv_action_recip_pid < b->rcv_action_recip_pid) 743 return (-1); 744 745 if (a->rcv_action_recip_pid > b->rcv_action_recip_pid) 746 return (1); 747 748 return (0); 749 } 750 751 static rctl_val_t * 752 rctl_val_list_find(rctl_val_t **head, rctl_val_t *cval) 753 { 754 rctl_val_t *rval = *head; 755 756 while (rval != NULL) { 757 if (rctl_val_cmp(cval, rval, 0) == 0) 758 return (rval); 759 760 rval = rval->rcv_next; 761 } 762 763 return (NULL); 764 765 } 766 767 /* 768 * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *) 769 * 770 * Overview 771 * This function inserts the rctl_val_t into the value list provided. 772 * The insert is always successful unless if the value is a duplicate 773 * of one already in the list. 774 * 775 * Return values 776 * 1 if the value was a duplicate of an existing value in the list. 777 * 0 if the insert was successful. 778 */ 779 int 780 rctl_val_list_insert(rctl_val_t **root, rctl_val_t *rval) 781 { 782 rctl_val_t *prev; 783 int equiv; 784 785 rval->rcv_next = NULL; 786 rval->rcv_prev = NULL; 787 788 if (*root == NULL) { 789 *root = rval; 790 return (0); 791 } 792 793 equiv = rctl_val_cmp(rval, *root, 0); 794 795 if (equiv == 0) 796 return (1); 797 798 if (equiv < 0) { 799 rval->rcv_next = *root; 800 rval->rcv_next->rcv_prev = rval; 801 *root = rval; 802 803 return (0); 804 } 805 806 prev = *root; 807 while (prev->rcv_next != NULL && 808 (equiv = rctl_val_cmp(rval, prev->rcv_next, 0)) > 0) { 809 prev = prev->rcv_next; 810 } 811 812 if (equiv == 0) 813 return (1); 814 815 rval->rcv_next = prev->rcv_next; 816 if (rval->rcv_next != NULL) 817 rval->rcv_next->rcv_prev = rval; 818 prev->rcv_next = rval; 819 rval->rcv_prev = prev; 820 821 return (0); 822 } 823 824 static int 825 rctl_val_list_delete(rctl_val_t **root, rctl_val_t *rval) 826 { 827 rctl_val_t *prev; 828 829 if (*root == NULL) 830 return (-1); 831 832 prev = *root; 833 if (rctl_val_cmp(rval, prev, 0) == 0) { 834 *root = prev->rcv_next; 835 if (*root != NULL) 836 (*root)->rcv_prev = NULL; 837 838 kmem_cache_free(rctl_val_cache, prev); 839 840 return (0); 841 } 842 843 while (prev->rcv_next != NULL && 844 rctl_val_cmp(rval, prev->rcv_next, 0) != 0) { 845 prev = prev->rcv_next; 846 } 847 848 if (prev->rcv_next == NULL) { 849 /* 850 * If we navigate the entire list and cannot find a match, then 851 * return failure. 852 */ 853 return (-1); 854 } 855 856 prev = prev->rcv_next; 857 prev->rcv_prev->rcv_next = prev->rcv_next; 858 if (prev->rcv_next != NULL) 859 prev->rcv_next->rcv_prev = prev->rcv_prev; 860 861 kmem_cache_free(rctl_val_cache, prev); 862 863 return (0); 864 } 865 866 static rctl_val_t * 867 rctl_val_list_dup(rctl_val_t *rval, rctl_alloc_gp_t *ragp, struct proc *oldp, 868 struct proc *newp) 869 { 870 rctl_val_t *head = NULL; 871 872 for (; rval != NULL; rval = rval->rcv_next) { 873 rctl_val_t *dval = rctl_gp_detach_val(ragp); 874 875 bcopy(rval, dval, sizeof (rctl_val_t)); 876 dval->rcv_prev = dval->rcv_next = NULL; 877 878 if (oldp == NULL || 879 rval->rcv_action_recipient == NULL || 880 rval->rcv_action_recipient == oldp) { 881 if (rval->rcv_privilege == RCPRIV_BASIC) { 882 dval->rcv_action_recipient = newp; 883 dval->rcv_action_recip_pid = newp->p_pid; 884 } else { 885 dval->rcv_action_recipient = NULL; 886 dval->rcv_action_recip_pid = -1; 887 } 888 889 (void) rctl_val_list_insert(&head, dval); 890 } else { 891 kmem_cache_free(rctl_val_cache, dval); 892 } 893 } 894 895 return (head); 896 } 897 898 static void 899 rctl_val_list_reset(rctl_val_t *rval) 900 { 901 for (; rval != NULL; rval = rval->rcv_next) 902 rval->rcv_firing_time = 0; 903 } 904 905 static uint_t 906 rctl_val_list_count(rctl_val_t *rval) 907 { 908 uint_t n = 0; 909 910 for (; rval != NULL; rval = rval->rcv_next) 911 n++; 912 913 return (n); 914 } 915 916 917 static void 918 rctl_val_list_free(rctl_val_t *rval) 919 { 920 while (rval != NULL) { 921 rctl_val_t *next = rval->rcv_next; 922 923 kmem_cache_free(rctl_val_cache, rval); 924 925 rval = next; 926 } 927 } 928 929 /* 930 * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *) 931 * 932 * Overview 933 * In cases where the operating system supports more than one process 934 * addressing model, the operating system capabilities will exceed those of 935 * one or more of these models. Processes in a less capable model must have 936 * their resources accurately controlled, without diluting those of their 937 * descendants reached via exec(). rctl_model_maximum() returns the governing 938 * value for the specified process with respect to a resource control, such 939 * that the value can used for the RCTLOP_SET callback or compatability 940 * support. 941 * 942 * Return values 943 * The maximum value for the given process for the specified resource control. 944 * 945 * Caller's context 946 * No restrictions on context. 947 */ 948 rctl_qty_t 949 rctl_model_maximum(rctl_dict_entry_t *rde, struct proc *p) 950 { 951 if (p->p_model == DATAMODEL_NATIVE) 952 return (rde->rcd_max_native); 953 954 return (rde->rcd_max_ilp32); 955 } 956 957 /* 958 * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t) 959 * 960 * Overview 961 * Convenience function wrapping the rctl_model_maximum() functionality. 962 * 963 * Return values 964 * The lesser of the process's maximum value and the given value for the 965 * specified resource control. 966 * 967 * Caller's context 968 * No restrictions on context. 969 */ 970 rctl_qty_t 971 rctl_model_value(rctl_dict_entry_t *rde, struct proc *p, rctl_qty_t value) 972 { 973 rctl_qty_t max = rctl_model_maximum(rde, p); 974 975 return (value < max ? value : max); 976 } 977 978 static void 979 rctl_set_insert(rctl_set_t *set, rctl_hndl_t hndl, rctl_t *rctl) 980 { 981 uint_t index = hndl % rctl_set_size; 982 rctl_t *next_ctl, *prev_ctl; 983 984 ASSERT(MUTEX_HELD(&set->rcs_lock)); 985 986 rctl->rc_next = NULL; 987 988 if (set->rcs_ctls[index] == NULL) { 989 set->rcs_ctls[index] = rctl; 990 return; 991 } 992 993 if (hndl < set->rcs_ctls[index]->rc_id) { 994 rctl->rc_next = set->rcs_ctls[index]; 995 set->rcs_ctls[index] = rctl; 996 997 return; 998 } 999 1000 for (next_ctl = set->rcs_ctls[index]->rc_next, 1001 prev_ctl = set->rcs_ctls[index]; 1002 next_ctl != NULL; 1003 prev_ctl = next_ctl, 1004 next_ctl = next_ctl->rc_next) { 1005 if (next_ctl->rc_id > hndl) { 1006 rctl->rc_next = next_ctl; 1007 prev_ctl->rc_next = rctl; 1008 1009 return; 1010 } 1011 } 1012 1013 rctl->rc_next = next_ctl; 1014 prev_ctl->rc_next = rctl; 1015 } 1016 1017 /* 1018 * rctl_set_t *rctl_set_create() 1019 * 1020 * Overview 1021 * Create an empty resource control set, suitable for attaching to a 1022 * controlled entity. 1023 * 1024 * Return values 1025 * A pointer to the newly created set. 1026 * 1027 * Caller's context 1028 * Safe for KM_SLEEP allocations. 1029 */ 1030 rctl_set_t * 1031 rctl_set_create() 1032 { 1033 rctl_set_t *rset = kmem_zalloc(sizeof (rctl_set_t), KM_SLEEP); 1034 1035 mutex_init(&rset->rcs_lock, NULL, MUTEX_DEFAULT, NULL); 1036 rset->rcs_ctls = kmem_zalloc(rctl_set_size * sizeof (rctl_t *), 1037 KM_SLEEP); 1038 rset->rcs_entity = -1; 1039 1040 return (rset); 1041 } 1042 1043 /* 1044 * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t) 1045 * 1046 * Overview 1047 * rctl_set_init_prealloc() examines the globally defined resource controls 1048 * and their default values and returns a resource control allocation group 1049 * populated with sufficient controls and values to form a representative 1050 * resource control set for the specified entity. 1051 * 1052 * Return values 1053 * A pointer to the newly created allocation group. 1054 * 1055 * Caller's context 1056 * Caller must be in a context suitable for KM_SLEEP allocations. 1057 */ 1058 rctl_alloc_gp_t * 1059 rctl_set_init_prealloc(rctl_entity_t entity) 1060 { 1061 rctl_dict_entry_t *rde; 1062 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1063 1064 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1065 1066 if (rctl_lists[entity] == NULL) 1067 return (ragp); 1068 1069 mutex_enter(&rctl_lists_lock); 1070 1071 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1072 ragp->rcag_nctls++; 1073 ragp->rcag_nvals += rctl_val_list_count(rde->rcd_default_value); 1074 } 1075 1076 mutex_exit(&rctl_lists_lock); 1077 1078 rctl_gp_alloc(ragp); 1079 1080 return (ragp); 1081 } 1082 1083 /* 1084 * rctl_set_t *rctl_set_init(rctl_entity_t) 1085 * 1086 * Overview 1087 * rctl_set_create() creates a resource control set, initialized with the 1088 * system infinite values on all registered controls, for attachment to a 1089 * system entity requiring resource controls, such as a process or a task. 1090 * 1091 * Return values 1092 * A pointer to the newly filled set. 1093 * 1094 * Caller's context 1095 * Caller must be holding p_lock on entry so that RCTLOP_SET() functions 1096 * may modify task and project members based on the proc structure 1097 * they are passed. 1098 */ 1099 rctl_set_t * 1100 rctl_set_init(rctl_entity_t entity, struct proc *p, rctl_entity_p_t *e, 1101 rctl_set_t *rset, rctl_alloc_gp_t *ragp) 1102 { 1103 rctl_dict_entry_t *rde; 1104 1105 ASSERT(MUTEX_HELD(&p->p_lock)); 1106 ASSERT(e); 1107 rset->rcs_entity = entity; 1108 1109 if (rctl_lists[entity] == NULL) 1110 return (rset); 1111 1112 mutex_enter(&rctl_lists_lock); 1113 mutex_enter(&rset->rcs_lock); 1114 1115 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1116 rctl_t *rctl = rctl_gp_detach_ctl(ragp); 1117 1118 rctl->rc_dict_entry = rde; 1119 rctl->rc_id = rde->rcd_id; 1120 rctl->rc_projdb = NULL; 1121 1122 rctl->rc_values = rctl_val_list_dup(rde->rcd_default_value, 1123 ragp, NULL, p); 1124 rctl->rc_cursor = rctl->rc_values; 1125 1126 ASSERT(rctl->rc_cursor != NULL); 1127 1128 rctl_set_insert(rset, rde->rcd_id, rctl); 1129 1130 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1131 rctl->rc_cursor->rcv_value)); 1132 } 1133 1134 mutex_exit(&rset->rcs_lock); 1135 mutex_exit(&rctl_lists_lock); 1136 1137 return (rset); 1138 } 1139 1140 static rctl_t * 1141 rctl_dup(rctl_t *rctl, rctl_alloc_gp_t *ragp, struct proc *oldp, 1142 struct proc *newp) 1143 { 1144 rctl_t *dup = rctl_gp_detach_ctl(ragp); 1145 rctl_val_t *dval; 1146 1147 dup->rc_id = rctl->rc_id; 1148 dup->rc_dict_entry = rctl->rc_dict_entry; 1149 dup->rc_next = NULL; 1150 dup->rc_cursor = NULL; 1151 dup->rc_values = rctl_val_list_dup(rctl->rc_values, ragp, oldp, newp); 1152 1153 for (dval = dup->rc_values; 1154 dval != NULL; dval = dval->rcv_next) { 1155 if (rctl_val_cmp(rctl->rc_cursor, dval, 0) >= 0) { 1156 dup->rc_cursor = dval; 1157 break; 1158 } 1159 } 1160 1161 if (dup->rc_cursor == NULL) 1162 dup->rc_cursor = dup->rc_values; 1163 1164 return (dup); 1165 } 1166 1167 static void 1168 rctl_set_fill_alloc_gp(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1169 { 1170 uint_t i; 1171 1172 bzero(ragp, sizeof (rctl_alloc_gp_t)); 1173 1174 for (i = 0; i < rctl_set_size; i++) { 1175 rctl_t *r = set->rcs_ctls[i]; 1176 1177 while (r != NULL) { 1178 ragp->rcag_nctls++; 1179 1180 ragp->rcag_nvals += rctl_val_list_count(r->rc_values); 1181 1182 r = r->rc_next; 1183 } 1184 } 1185 } 1186 1187 /* 1188 * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *) 1189 * 1190 * Overview 1191 * Given a resource control set, allocate a sufficiently large allocation 1192 * group to contain a duplicate of the set. 1193 * 1194 * Return value 1195 * A pointer to the newly created allocation group. 1196 * 1197 * Caller's context 1198 * Safe for KM_SLEEP allocations. 1199 */ 1200 rctl_alloc_gp_t * 1201 rctl_set_dup_prealloc(rctl_set_t *set) 1202 { 1203 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1204 1205 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1206 1207 mutex_enter(&set->rcs_lock); 1208 rctl_set_fill_alloc_gp(set, ragp); 1209 mutex_exit(&set->rcs_lock); 1210 1211 rctl_gp_alloc(ragp); 1212 1213 return (ragp); 1214 } 1215 1216 /* 1217 * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *) 1218 * 1219 * Overview 1220 * Verify that the allocation group provided is large enough to allow a 1221 * duplicate of the given resource control set to be constructed from its 1222 * contents. 1223 * 1224 * Return values 1225 * 1 if the allocation group is sufficiently large, 0 otherwise. 1226 * 1227 * Caller's context 1228 * rcs_lock must be held prior to entry. 1229 */ 1230 int 1231 rctl_set_dup_ready(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1232 { 1233 rctl_alloc_gp_t curr_gp; 1234 1235 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1236 1237 rctl_set_fill_alloc_gp(set, &curr_gp); 1238 1239 if (curr_gp.rcag_nctls <= ragp->rcag_nctls && 1240 curr_gp.rcag_nvals <= ragp->rcag_nvals) 1241 return (1); 1242 1243 return (0); 1244 } 1245 1246 /* 1247 * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *, 1248 * rctl_set_t *, rctl_alloc_gp_t *, int) 1249 * 1250 * Overview 1251 * Make a duplicate of the resource control set. The proc pointers are those 1252 * of the owning process and of the process associated with the entity 1253 * receiving the duplicate. 1254 * 1255 * Duplication is a 3 stage process. Stage 1 is memory allocation for 1256 * the duplicate set, which is taken care of by rctl_set_dup_prealloc(). 1257 * Stage 2 consists of copying all rctls and values from the old set into 1258 * the new. Stage 3 completes the duplication by performing the appropriate 1259 * callbacks for each rctl in the new set. 1260 * 1261 * Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and 1262 * RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only 1263 * be supplied if the newp proc structure reflects the new task and 1264 * project linkage. 1265 * 1266 * Return value 1267 * A pointer to the duplicate set. 1268 * 1269 * Caller's context 1270 * The rcs_lock of the set to be duplicated must be held prior to entry. 1271 */ 1272 rctl_set_t * 1273 rctl_set_dup(rctl_set_t *set, struct proc *oldp, struct proc *newp, 1274 rctl_entity_p_t *e, rctl_set_t *dup, rctl_alloc_gp_t *ragp, int flag) 1275 { 1276 uint_t i; 1277 rctl_set_t *iter; 1278 1279 ASSERT((flag & RCD_DUP) || (flag & RCD_CALLBACK)); 1280 ASSERT(e); 1281 /* 1282 * When copying the old set, iterate over that. Otherwise, when 1283 * only callbacks have been requested, iterate over the dup set. 1284 */ 1285 if (flag & RCD_DUP) { 1286 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1287 iter = set; 1288 dup->rcs_entity = set->rcs_entity; 1289 } else { 1290 iter = dup; 1291 } 1292 1293 mutex_enter(&dup->rcs_lock); 1294 1295 for (i = 0; i < rctl_set_size; i++) { 1296 rctl_t *r = iter->rcs_ctls[i]; 1297 rctl_t *d; 1298 1299 while (r != NULL) { 1300 if (flag & RCD_DUP) { 1301 d = rctl_dup(r, ragp, oldp, newp); 1302 rctl_set_insert(dup, r->rc_id, d); 1303 } else { 1304 d = r; 1305 } 1306 1307 if (flag & RCD_CALLBACK) 1308 RCTLOP_SET(d, newp, e, 1309 rctl_model_value(d->rc_dict_entry, newp, 1310 d->rc_cursor->rcv_value)); 1311 1312 r = r->rc_next; 1313 } 1314 } 1315 1316 mutex_exit(&dup->rcs_lock); 1317 1318 return (dup); 1319 } 1320 1321 /* 1322 * void rctl_set_free(rctl_set_t *) 1323 * 1324 * Overview 1325 * Delete resource control set and all attached values. 1326 * 1327 * Return values 1328 * No value returned. 1329 * 1330 * Caller's context 1331 * No restrictions on context. 1332 */ 1333 void 1334 rctl_set_free(rctl_set_t *set) 1335 { 1336 uint_t i; 1337 1338 mutex_enter(&set->rcs_lock); 1339 for (i = 0; i < rctl_set_size; i++) { 1340 rctl_t *r = set->rcs_ctls[i]; 1341 1342 while (r != NULL) { 1343 rctl_val_t *v = r->rc_values; 1344 rctl_t *n = r->rc_next; 1345 1346 kmem_cache_free(rctl_cache, r); 1347 1348 rctl_val_list_free(v); 1349 1350 r = n; 1351 } 1352 } 1353 mutex_exit(&set->rcs_lock); 1354 1355 kmem_free(set->rcs_ctls, sizeof (rctl_t *) * rctl_set_size); 1356 kmem_free(set, sizeof (rctl_set_t)); 1357 } 1358 1359 /* 1360 * void rctl_set_reset(rctl_set_t *) 1361 * 1362 * Overview 1363 * Resets all rctls within the set such that the lowest value becomes active. 1364 * 1365 * Return values 1366 * No value returned. 1367 * 1368 * Caller's context 1369 * No restrictions on context. 1370 */ 1371 void 1372 rctl_set_reset(rctl_set_t *set, struct proc *p, rctl_entity_p_t *e) 1373 { 1374 uint_t i; 1375 1376 ASSERT(e); 1377 1378 mutex_enter(&set->rcs_lock); 1379 for (i = 0; i < rctl_set_size; i++) { 1380 rctl_t *r = set->rcs_ctls[i]; 1381 1382 while (r != NULL) { 1383 r->rc_cursor = r->rc_values; 1384 rctl_val_list_reset(r->rc_cursor); 1385 RCTLOP_SET(r, p, e, rctl_model_value(r->rc_dict_entry, 1386 p, r->rc_cursor->rcv_value)); 1387 1388 ASSERT(r->rc_cursor != NULL); 1389 1390 r = r->rc_next; 1391 } 1392 } 1393 1394 mutex_exit(&set->rcs_lock); 1395 } 1396 1397 /* 1398 * void rctl_set_tearoff(rctl_set *, struct proc *) 1399 * 1400 * Overview 1401 * Tear off any resource control values on this set with an action recipient 1402 * equal to the specified process (as they are becoming invalid with the 1403 * process's departure from this set as an observer). 1404 * 1405 * Return values 1406 * No value returned. 1407 * 1408 * Caller's context 1409 * No restrictions on context 1410 */ 1411 void 1412 rctl_set_tearoff(rctl_set_t *set, struct proc *p) 1413 { 1414 uint_t i; 1415 1416 mutex_enter(&set->rcs_lock); 1417 for (i = 0; i < rctl_set_size; i++) { 1418 rctl_t *r = set->rcs_ctls[i]; 1419 1420 while (r != NULL) { 1421 rctl_val_t *rval; 1422 1423 tearoff_rewalk_list: 1424 rval = r->rc_values; 1425 1426 while (rval != NULL) { 1427 if (rval->rcv_privilege == RCPRIV_BASIC && 1428 rval->rcv_action_recipient == p) { 1429 if (r->rc_cursor == rval) 1430 r->rc_cursor = rval->rcv_next; 1431 1432 (void) rctl_val_list_delete( 1433 &r->rc_values, rval); 1434 1435 goto tearoff_rewalk_list; 1436 } 1437 1438 rval = rval->rcv_next; 1439 } 1440 1441 ASSERT(r->rc_cursor != NULL); 1442 1443 r = r->rc_next; 1444 } 1445 } 1446 1447 mutex_exit(&set->rcs_lock); 1448 } 1449 1450 static int 1451 rctl_set_find(rctl_set_t *set, rctl_hndl_t hndl, rctl_t **rctl) 1452 { 1453 uint_t index = hndl % rctl_set_size; 1454 rctl_t *curr_ctl; 1455 1456 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1457 1458 for (curr_ctl = set->rcs_ctls[index]; curr_ctl != NULL; 1459 curr_ctl = curr_ctl->rc_next) { 1460 if (curr_ctl->rc_id == hndl) { 1461 *rctl = curr_ctl; 1462 1463 return (0); 1464 } 1465 } 1466 1467 return (-1); 1468 } 1469 1470 /* 1471 * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *) 1472 * 1473 * Overview 1474 * Given a process, get the next enforced value on the rctl of the specified 1475 * handle. 1476 * 1477 * Return value 1478 * The enforced value. 1479 * 1480 * Caller's context 1481 * For controls on process collectives, p->p_lock must be held across the 1482 * operation. 1483 */ 1484 /*ARGSUSED*/ 1485 rctl_qty_t 1486 rctl_enforced_value(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p) 1487 { 1488 rctl_t *rctl; 1489 rlim64_t ret; 1490 1491 mutex_enter(&rset->rcs_lock); 1492 1493 if (rctl_set_find(rset, hndl, &rctl) == -1) 1494 panic("unknown resource control handle %d requested", hndl); 1495 else 1496 ret = rctl_model_value(rctl->rc_dict_entry, p, 1497 rctl->rc_cursor->rcv_value); 1498 1499 mutex_exit(&rset->rcs_lock); 1500 1501 return (ret); 1502 } 1503 1504 /* 1505 * int rctl_global_get(const char *, rctl_dict_entry_t *) 1506 * 1507 * Overview 1508 * Copy a sanitized version of the global rctl for a given resource control 1509 * name. (By sanitization, we mean that the unsafe data pointers have been 1510 * zeroed.) 1511 * 1512 * Return value 1513 * -1 if name not defined, 0 otherwise. 1514 * 1515 * Caller's context 1516 * No restrictions on context. rctl_dict_lock must not be held. 1517 */ 1518 int 1519 rctl_global_get(const char *name, rctl_dict_entry_t *drde) 1520 { 1521 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1522 1523 if (rde == NULL) 1524 return (-1); 1525 1526 bcopy(rde, drde, sizeof (rctl_dict_entry_t)); 1527 1528 drde->rcd_next = NULL; 1529 drde->rcd_ops = NULL; 1530 1531 return (0); 1532 } 1533 1534 /* 1535 * int rctl_global_set(const char *, rctl_dict_entry_t *) 1536 * 1537 * Overview 1538 * Transfer the settable fields of the named rctl to the global rctl matching 1539 * the given resource control name. 1540 * 1541 * Return value 1542 * -1 if name not defined, 0 otherwise. 1543 * 1544 * Caller's context 1545 * No restrictions on context. rctl_dict_lock must not be held. 1546 */ 1547 int 1548 rctl_global_set(const char *name, rctl_dict_entry_t *drde) 1549 { 1550 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1551 1552 if (rde == NULL) 1553 return (-1); 1554 1555 rde->rcd_flagaction = drde->rcd_flagaction; 1556 rde->rcd_syslog_level = drde->rcd_syslog_level; 1557 rde->rcd_strlog_flags = drde->rcd_strlog_flags; 1558 1559 return (0); 1560 } 1561 1562 static int 1563 rctl_local_op(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1564 int (*cbop)(rctl_hndl_t, struct proc *p, rctl_entity_p_t *e, rctl_t *, 1565 rctl_val_t *, rctl_val_t *), struct proc *p) 1566 { 1567 rctl_t *rctl; 1568 rctl_set_t *rset; 1569 rctl_entity_p_t e; 1570 int ret = 0; 1571 rctl_dict_entry_t *rde = rctl_dict_lookup_hndl(hndl); 1572 1573 local_op_retry: 1574 1575 ASSERT(MUTEX_HELD(&p->p_lock)); 1576 1577 rset = rctl_entity_obtain_rset(rde, p); 1578 1579 if (rset == NULL) { 1580 return (-1); 1581 } 1582 rctl_entity_obtain_entity_p(rset->rcs_entity, p, &e); 1583 1584 mutex_enter(&rset->rcs_lock); 1585 1586 /* using rctl's hndl, get rctl from local set */ 1587 if (rctl_set_find(rset, hndl, &rctl) == -1) { 1588 mutex_exit(&rset->rcs_lock); 1589 return (-1); 1590 } 1591 1592 ret = cbop(hndl, p, &e, rctl, oval, nval); 1593 1594 mutex_exit(&rset->rcs_lock); 1595 return (ret); 1596 } 1597 1598 /*ARGSUSED*/ 1599 static int 1600 rctl_local_get_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1601 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1602 { 1603 if (oval == NULL) { 1604 /* 1605 * RCTL_FIRST 1606 */ 1607 bcopy(rctl->rc_values, nval, sizeof (rctl_val_t)); 1608 } else { 1609 /* 1610 * RCTL_NEXT 1611 */ 1612 rctl_val_t *tval = rctl_val_list_find(&rctl->rc_values, oval); 1613 1614 if (tval == NULL) 1615 return (ESRCH); 1616 else if (tval->rcv_next == NULL) 1617 return (ENOENT); 1618 else 1619 bcopy(tval->rcv_next, nval, sizeof (rctl_val_t)); 1620 } 1621 1622 return (0); 1623 } 1624 1625 /* 1626 * int rctl_local_get(rctl_hndl_t, rctl_val_t *) 1627 * 1628 * Overview 1629 * Get the rctl value for the given flags. 1630 * 1631 * Return values 1632 * 0 for successful get, errno otherwise. 1633 */ 1634 int 1635 rctl_local_get(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1636 struct proc *p) 1637 { 1638 return (rctl_local_op(hndl, oval, nval, rctl_local_get_cb, p)); 1639 } 1640 1641 /*ARGSUSED*/ 1642 static int 1643 rctl_local_delete_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1644 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1645 { 1646 if ((oval = rctl_val_list_find(&rctl->rc_values, nval)) == NULL) 1647 return (ESRCH); 1648 1649 if (rctl->rc_cursor == oval) { 1650 rctl->rc_cursor = oval->rcv_next; 1651 rctl_val_list_reset(rctl->rc_cursor); 1652 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1653 rctl->rc_cursor->rcv_value)); 1654 1655 ASSERT(rctl->rc_cursor != NULL); 1656 } 1657 1658 (void) rctl_val_list_delete(&rctl->rc_values, oval); 1659 1660 return (0); 1661 } 1662 1663 /* 1664 * int rctl_local_delete(rctl_hndl_t, rctl_val_t *) 1665 * 1666 * Overview 1667 * Delete the rctl value for the given flags. 1668 * 1669 * Return values 1670 * 0 for successful delete, errno otherwise. 1671 */ 1672 int 1673 rctl_local_delete(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1674 { 1675 return (rctl_local_op(hndl, NULL, val, rctl_local_delete_cb, p)); 1676 } 1677 1678 /* 1679 * rctl_local_insert_cb() 1680 * 1681 * Overview 1682 * Insert a new value into the rctl's val list. If an error occurs, 1683 * the val list must be left in the same state as when the function 1684 * was entered. 1685 * 1686 * Return Values 1687 * 0 for successful insert, EINVAL if the value is duplicated in the 1688 * existing list. 1689 */ 1690 /*ARGSUSED*/ 1691 static int 1692 rctl_local_insert_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1693 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1694 { 1695 /* 1696 * Before inserting, confirm there are no duplicates of this value 1697 * and flag level. If there is a duplicate, flag an error and do 1698 * nothing. 1699 */ 1700 if (rctl_val_list_insert(&rctl->rc_values, nval) != 0) 1701 return (EINVAL); 1702 1703 if (rctl_val_cmp(nval, rctl->rc_cursor, 0) < 0) { 1704 rctl->rc_cursor = nval; 1705 rctl_val_list_reset(rctl->rc_cursor); 1706 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1707 rctl->rc_cursor->rcv_value)); 1708 1709 ASSERT(rctl->rc_cursor != NULL); 1710 } 1711 1712 return (0); 1713 } 1714 1715 /* 1716 * int rctl_local_insert(rctl_hndl_t, rctl_val_t *) 1717 * 1718 * Overview 1719 * Insert the rctl value into the appropriate rctl set for the calling 1720 * process, given the handle. 1721 */ 1722 int 1723 rctl_local_insert(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1724 { 1725 return (rctl_local_op(hndl, NULL, val, rctl_local_insert_cb, p)); 1726 } 1727 1728 /* 1729 * rctl_local_insert_all_cb() 1730 * 1731 * Overview 1732 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1733 * 1734 * Inserts new values from the project database (new_values). alloc_values 1735 * should be a linked list of pre-allocated rctl_val_t, which are used to 1736 * populate (rc_projdb). 1737 * 1738 * Should the *new_values linked list match the contents of the rctl's 1739 * rp_projdb then we do nothing. 1740 * 1741 * Return Values 1742 * 0 is always returned. 1743 */ 1744 /*ARGSUSED*/ 1745 static int 1746 rctl_local_insert_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1747 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1748 { 1749 rctl_val_t *val; 1750 rctl_val_t *tmp_val; 1751 rctl_val_t *next; 1752 int modified = 0; 1753 1754 /* 1755 * If this the first time we've set this project rctl, then we delete 1756 * all the privilege values. These privilege values have been set by 1757 * rctl_add_default_limit(). 1758 * 1759 * We save some cycles here by not calling rctl_val_list_delete(). 1760 */ 1761 if (rctl->rc_projdb == NULL) { 1762 val = rctl->rc_values; 1763 1764 while (val != NULL) { 1765 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1766 if (val->rcv_prev != NULL) 1767 val->rcv_prev->rcv_next = val->rcv_next; 1768 else 1769 rctl->rc_values = val->rcv_next; 1770 1771 if (val->rcv_next != NULL) 1772 val->rcv_next->rcv_prev = val->rcv_prev; 1773 1774 tmp_val = val; 1775 val = val->rcv_next; 1776 kmem_cache_free(rctl_val_cache, tmp_val); 1777 } else { 1778 val = val->rcv_next; 1779 } 1780 } 1781 modified = 1; 1782 } 1783 1784 /* 1785 * Delete active values previously set through the project database. 1786 */ 1787 val = rctl->rc_projdb; 1788 1789 while (val != NULL) { 1790 1791 /* Is the old value found in the new values? */ 1792 if (rctl_val_list_find(&new_values, val) == NULL) { 1793 1794 /* 1795 * Delete from the active values if it originated from 1796 * the project database. 1797 */ 1798 if (((tmp_val = rctl_val_list_find(&rctl->rc_values, 1799 val)) != NULL) && 1800 (tmp_val->rcv_flagaction & RCTL_LOCAL_PROJDB)) { 1801 (void) rctl_val_list_delete(&rctl->rc_values, 1802 tmp_val); 1803 } 1804 1805 tmp_val = val->rcv_next; 1806 (void) rctl_val_list_delete(&rctl->rc_projdb, val); 1807 val = tmp_val; 1808 modified = 1; 1809 1810 } else 1811 val = val->rcv_next; 1812 } 1813 1814 /* 1815 * Insert new values from the project database. 1816 */ 1817 while (new_values != NULL) { 1818 next = new_values->rcv_next; 1819 1820 /* 1821 * Insert this new value into the rc_projdb, and duplicate this 1822 * entry to the active list. 1823 */ 1824 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1825 1826 tmp_val = alloc_values->rcv_next; 1827 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1828 alloc_values->rcv_next = tmp_val; 1829 1830 if (rctl_val_list_insert(&rctl->rc_values, 1831 alloc_values) == 0) { 1832 /* inserted move alloc_values on */ 1833 alloc_values = tmp_val; 1834 modified = 1; 1835 } 1836 } else { 1837 /* 1838 * Unlike setrctl() we don't want to return an error on 1839 * a duplicate entry; we are concerned solely with 1840 * ensuring that all the values specified are set. 1841 */ 1842 kmem_cache_free(rctl_val_cache, new_values); 1843 } 1844 new_values = next; 1845 } 1846 1847 /* Teardown any unused rctl_val_t */ 1848 while (alloc_values != NULL) { 1849 tmp_val = alloc_values; 1850 alloc_values = alloc_values->rcv_next; 1851 kmem_cache_free(rctl_val_cache, tmp_val); 1852 } 1853 1854 /* Reset the cursor if rctl values have been modified */ 1855 if (modified) { 1856 rctl->rc_cursor = rctl->rc_values; 1857 rctl_val_list_reset(rctl->rc_cursor); 1858 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1859 rctl->rc_cursor->rcv_value)); 1860 } 1861 1862 return (0); 1863 } 1864 1865 int 1866 rctl_local_insert_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1867 rctl_val_t *alloc_values, struct proc *p) 1868 { 1869 return (rctl_local_op(hndl, new_values, alloc_values, 1870 rctl_local_insert_all_cb, p)); 1871 } 1872 1873 /* 1874 * rctl_local_replace_all_cb() 1875 * 1876 * Overview 1877 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1878 * 1879 * Clears the active rctl values (rc_values), and stored values from the 1880 * previous insertions from the project database (rc_projdb). 1881 * 1882 * Inserts new values from the project database (new_values). alloc_values 1883 * should be a linked list of pre-allocated rctl_val_t, which are used to 1884 * populate (rc_projdb). 1885 * 1886 * Return Values 1887 * 0 is always returned. 1888 */ 1889 /*ARGSUSED*/ 1890 static int 1891 rctl_local_replace_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1892 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1893 { 1894 rctl_val_t *val; 1895 rctl_val_t *next; 1896 rctl_val_t *tmp_val; 1897 1898 /* Delete all the privilege vaules */ 1899 val = rctl->rc_values; 1900 1901 while (val != NULL) { 1902 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1903 if (val->rcv_prev != NULL) 1904 val->rcv_prev->rcv_next = val->rcv_next; 1905 else 1906 rctl->rc_values = val->rcv_next; 1907 1908 if (val->rcv_next != NULL) 1909 val->rcv_next->rcv_prev = val->rcv_prev; 1910 1911 tmp_val = val; 1912 val = val->rcv_next; 1913 kmem_cache_free(rctl_val_cache, tmp_val); 1914 } else { 1915 val = val->rcv_next; 1916 } 1917 } 1918 1919 /* Delete the contents of rc_projdb */ 1920 val = rctl->rc_projdb; 1921 while (val != NULL) { 1922 1923 tmp_val = val; 1924 val = val->rcv_next; 1925 kmem_cache_free(rctl_val_cache, tmp_val); 1926 } 1927 rctl->rc_projdb = NULL; 1928 1929 /* 1930 * Insert new values from the project database. 1931 */ 1932 while (new_values != NULL) { 1933 next = new_values->rcv_next; 1934 1935 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1936 tmp_val = alloc_values->rcv_next; 1937 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1938 alloc_values->rcv_next = tmp_val; 1939 1940 if (rctl_val_list_insert(&rctl->rc_values, 1941 alloc_values) == 0) { 1942 /* inserted, so move alloc_values on */ 1943 alloc_values = tmp_val; 1944 } 1945 } else { 1946 /* 1947 * Unlike setrctl() we don't want to return an error on 1948 * a duplicate entry; we are concerned solely with 1949 * ensuring that all the values specified are set. 1950 */ 1951 kmem_cache_free(rctl_val_cache, new_values); 1952 } 1953 1954 new_values = next; 1955 } 1956 1957 /* Teardown any unused rctl_val_t */ 1958 while (alloc_values != NULL) { 1959 tmp_val = alloc_values; 1960 alloc_values = alloc_values->rcv_next; 1961 kmem_cache_free(rctl_val_cache, tmp_val); 1962 } 1963 1964 /* Always reset the cursor */ 1965 rctl->rc_cursor = rctl->rc_values; 1966 rctl_val_list_reset(rctl->rc_cursor); 1967 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1968 rctl->rc_cursor->rcv_value)); 1969 1970 return (0); 1971 } 1972 1973 int 1974 rctl_local_replace_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1975 rctl_val_t *alloc_values, struct proc *p) 1976 { 1977 return (rctl_local_op(hndl, new_values, alloc_values, 1978 rctl_local_replace_all_cb, p)); 1979 } 1980 1981 static int 1982 rctl_local_replace_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1983 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1984 { 1985 int ret; 1986 rctl_val_t *tmp; 1987 1988 /* Verify that old will be delete-able */ 1989 tmp = rctl_val_list_find(&rctl->rc_values, oval); 1990 if (tmp == NULL) 1991 return (ESRCH); 1992 /* 1993 * Caller should verify that value being deleted is not the 1994 * system value. 1995 */ 1996 ASSERT(tmp->rcv_privilege != RCPRIV_SYSTEM); 1997 1998 /* 1999 * rctl_local_insert_cb() does the job of flagging an error 2000 * for any duplicate values. So, call rctl_local_insert_cb() 2001 * for the new value first, then do deletion of the old value. 2002 * Since this is a callback function to rctl_local_op, we can 2003 * count on rcs_lock being held at this point. This guarantees 2004 * that there is at no point a visible list which contains both 2005 * new and old values. 2006 */ 2007 if (ret = rctl_local_insert_cb(hndl, p, e, rctl, NULL, nval)) 2008 return (ret); 2009 2010 ret = rctl_local_delete_cb(hndl, p, e, rctl, NULL, oval); 2011 ASSERT(ret == 0); 2012 return (0); 2013 } 2014 2015 /* 2016 * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *) 2017 * 2018 * Overview 2019 * Replace the rctl value with a new one. 2020 * 2021 * Return values 2022 * 0 for successful replace, errno otherwise. 2023 */ 2024 int 2025 rctl_local_replace(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 2026 struct proc *p) 2027 { 2028 return (rctl_local_op(hndl, oval, nval, rctl_local_replace_cb, p)); 2029 } 2030 2031 /* 2032 * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *) 2033 * 2034 * Overview 2035 * To support rlimit compatibility, we need a function which takes a 64-bit 2036 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2037 * This operation is only intended for legacy rlimits. 2038 */ 2039 int 2040 rctl_rlimit_get(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64) 2041 { 2042 rctl_t *rctl; 2043 rctl_val_t *rval; 2044 rctl_set_t *rset = p->p_rctls; 2045 int soft_limit_seen = 0; 2046 int test_for_deny = 1; 2047 2048 mutex_enter(&rset->rcs_lock); 2049 if (rctl_set_find(rset, rc, &rctl) == -1) { 2050 mutex_exit(&rset->rcs_lock); 2051 return (-1); 2052 } 2053 2054 rval = rctl->rc_values; 2055 2056 if (rctl->rc_dict_entry->rcd_flagaction & (RCTL_GLOBAL_DENY_NEVER | 2057 RCTL_GLOBAL_DENY_ALWAYS)) 2058 test_for_deny = 0; 2059 2060 /* 2061 * 1. Find the first control value with the RCTL_LOCAL_DENY bit set. 2062 */ 2063 while (rval != NULL && rval->rcv_privilege != RCPRIV_SYSTEM) { 2064 if (test_for_deny && 2065 (rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0) { 2066 rval = rval->rcv_next; 2067 continue; 2068 } 2069 2070 /* 2071 * 2. If this is an RCPRIV_BASIC value, then we've found the 2072 * effective soft limit and should set rlim_cur. We should then 2073 * continue looking for another control value with the DENY bit 2074 * set. 2075 */ 2076 if (rval->rcv_privilege == RCPRIV_BASIC) { 2077 if (soft_limit_seen) { 2078 rval = rval->rcv_next; 2079 continue; 2080 } 2081 2082 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2083 rval->rcv_value < rctl_model_maximum( 2084 rctl->rc_dict_entry, p)) 2085 rlp64->rlim_cur = rval->rcv_value; 2086 else 2087 rlp64->rlim_cur = RLIM64_INFINITY; 2088 soft_limit_seen = 1; 2089 2090 rval = rval->rcv_next; 2091 continue; 2092 } 2093 2094 /* 2095 * 3. This is an RCPRIV_PRIVILEGED value. If we haven't found 2096 * a soft limit candidate, then we've found the effective hard 2097 * and soft limits and should set both If we had found a soft 2098 * limit, then this is only the hard limit and we need only set 2099 * rlim_max. 2100 */ 2101 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2102 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, 2103 p)) 2104 rlp64->rlim_max = rval->rcv_value; 2105 else 2106 rlp64->rlim_max = RLIM64_INFINITY; 2107 if (!soft_limit_seen) 2108 rlp64->rlim_cur = rlp64->rlim_max; 2109 2110 mutex_exit(&rset->rcs_lock); 2111 return (0); 2112 } 2113 2114 if (rval == NULL) { 2115 /* 2116 * This control sequence is corrupt, as it is not terminated by 2117 * a system privileged control value. 2118 */ 2119 mutex_exit(&rset->rcs_lock); 2120 return (-1); 2121 } 2122 2123 /* 2124 * 4. If we run into a RCPRIV_SYSTEM value, then the hard limit (and 2125 * the soft, if we haven't a soft candidate) should be the value of the 2126 * system control value. 2127 */ 2128 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2129 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, p)) 2130 rlp64->rlim_max = rval->rcv_value; 2131 else 2132 rlp64->rlim_max = RLIM64_INFINITY; 2133 2134 if (!soft_limit_seen) 2135 rlp64->rlim_cur = rlp64->rlim_max; 2136 2137 mutex_exit(&rset->rcs_lock); 2138 return (0); 2139 } 2140 2141 /* 2142 * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t) 2143 * 2144 * Overview 2145 * Before making a series of calls to rctl_rlimit_set(), we must have a 2146 * preallocated batch of resource control values, as rctl_rlimit_set() can 2147 * potentially consume two resource control values per call. 2148 * 2149 * Return values 2150 * A populated resource control allocation group with 2n resource control 2151 * values. 2152 * 2153 * Caller's context 2154 * Must be safe for KM_SLEEP allocations. 2155 */ 2156 rctl_alloc_gp_t * 2157 rctl_rlimit_set_prealloc(uint_t n) 2158 { 2159 rctl_alloc_gp_t *gp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 2160 2161 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 2162 2163 gp->rcag_nvals = 2 * n; 2164 2165 rctl_gp_alloc(gp); 2166 2167 return (gp); 2168 } 2169 2170 /* 2171 * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int, 2172 * int) 2173 * 2174 * Overview 2175 * To support rlimit compatibility, we need a function which takes a 64-bit 2176 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2177 * This operation is only intended for legacy rlimits. 2178 * 2179 * The implementation of rctl_rlimit_set() is a bit clever, as it tries to 2180 * minimize the number of values placed on the value sequence in various 2181 * cases. Furthermore, we don't allow multiple identical privilege-action 2182 * values on the same sequence. (That is, we don't want a sequence like 2183 * "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel 2184 * memory.) So we want to delete any values with the same privilege value and 2185 * action. 2186 * 2187 * Return values 2188 * 0 for successful set, errno otherwise. Errno will be either EINVAL 2189 * or EPERM, in keeping with defined errnos for ulimit() and setrlimit() 2190 * system calls. 2191 */ 2192 /*ARGSUSED*/ 2193 int 2194 rctl_rlimit_set(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64, 2195 rctl_alloc_gp_t *ragp, int flagaction, int signal, const cred_t *cr) 2196 { 2197 rctl_t *rctl; 2198 rctl_val_t *rval, *rval_priv, *rval_basic; 2199 rctl_set_t *rset = p->p_rctls; 2200 rctl_qty_t max; 2201 rctl_entity_p_t e; 2202 struct rlimit64 cur_rl; 2203 2204 e.rcep_t = RCENTITY_PROCESS; 2205 e.rcep_p.proc = p; 2206 2207 if (rlp64->rlim_cur > rlp64->rlim_max) 2208 return (EINVAL); 2209 2210 if (rctl_rlimit_get(rc, p, &cur_rl) == -1) 2211 return (EINVAL); 2212 2213 /* 2214 * If we are not privileged, we can only lower the hard limit. 2215 */ 2216 if ((rlp64->rlim_max > cur_rl.rlim_max) && 2217 cur_rl.rlim_max != RLIM64_INFINITY && 2218 secpolicy_resource(cr) != 0) 2219 return (EPERM); 2220 2221 mutex_enter(&rset->rcs_lock); 2222 2223 if (rctl_set_find(rset, rc, &rctl) == -1) { 2224 mutex_exit(&rset->rcs_lock); 2225 return (EINVAL); 2226 } 2227 2228 rval_priv = rctl_gp_detach_val(ragp); 2229 2230 rval = rctl->rc_values; 2231 2232 while (rval != NULL) { 2233 rctl_val_t *next = rval->rcv_next; 2234 2235 if (rval->rcv_privilege == RCPRIV_SYSTEM) 2236 break; 2237 2238 if ((rval->rcv_privilege == RCPRIV_BASIC) || 2239 (rval->rcv_flagaction & ~RCTL_LOCAL_ACTION_MASK) == 2240 (flagaction & ~RCTL_LOCAL_ACTION_MASK)) { 2241 if (rctl->rc_cursor == rval) { 2242 rctl->rc_cursor = rval->rcv_next; 2243 rctl_val_list_reset(rctl->rc_cursor); 2244 RCTLOP_SET(rctl, p, &e, rctl_model_value( 2245 rctl->rc_dict_entry, p, 2246 rctl->rc_cursor->rcv_value)); 2247 } 2248 (void) rctl_val_list_delete(&rctl->rc_values, rval); 2249 } 2250 2251 rval = next; 2252 } 2253 2254 rval_priv->rcv_privilege = RCPRIV_PRIVILEGED; 2255 rval_priv->rcv_flagaction = flagaction; 2256 if (rlp64->rlim_max == RLIM64_INFINITY) { 2257 rval_priv->rcv_flagaction |= RCTL_LOCAL_MAXIMAL; 2258 max = rctl->rc_dict_entry->rcd_max_native; 2259 } else { 2260 max = rlp64->rlim_max; 2261 } 2262 rval_priv->rcv_value = max; 2263 rval_priv->rcv_action_signal = signal; 2264 rval_priv->rcv_action_recipient = NULL; 2265 rval_priv->rcv_action_recip_pid = -1; 2266 rval_priv->rcv_firing_time = 0; 2267 rval_priv->rcv_prev = rval_priv->rcv_next = NULL; 2268 2269 (void) rctl_val_list_insert(&rctl->rc_values, rval_priv); 2270 rctl->rc_cursor = rval_priv; 2271 rctl_val_list_reset(rctl->rc_cursor); 2272 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2273 rctl->rc_cursor->rcv_value)); 2274 2275 if (rlp64->rlim_cur != RLIM64_INFINITY && rlp64->rlim_cur < max) { 2276 rval_basic = rctl_gp_detach_val(ragp); 2277 2278 rval_basic->rcv_privilege = RCPRIV_BASIC; 2279 rval_basic->rcv_value = rlp64->rlim_cur; 2280 rval_basic->rcv_flagaction = flagaction; 2281 rval_basic->rcv_action_signal = signal; 2282 rval_basic->rcv_action_recipient = p; 2283 rval_basic->rcv_action_recip_pid = p->p_pid; 2284 rval_basic->rcv_firing_time = 0; 2285 rval_basic->rcv_prev = rval_basic->rcv_next = NULL; 2286 2287 (void) rctl_val_list_insert(&rctl->rc_values, rval_basic); 2288 rctl->rc_cursor = rval_basic; 2289 rctl_val_list_reset(rctl->rc_cursor); 2290 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2291 rctl->rc_cursor->rcv_value)); 2292 } 2293 2294 ASSERT(rctl->rc_cursor != NULL); 2295 2296 mutex_exit(&rset->rcs_lock); 2297 return (0); 2298 } 2299 2300 2301 /* 2302 * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t, 2303 * rlim64_t, rctl_ops_t *) 2304 * 2305 * Overview 2306 * rctl_register() performs a look-up in the dictionary of rctls 2307 * active on the system; if a rctl of that name is absent, an entry is 2308 * made into the dictionary. The rctl is returned with its reference 2309 * count incremented by one. If the rctl name already exists, we panic. 2310 * (Were the resource control system to support dynamic loading and unloading, 2311 * which it is structured for, duplicate registration should lead to load 2312 * failure instead of panicking.) 2313 * 2314 * Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be 2315 * defined. This limit contains the highest possible value for this quantity 2316 * on the system. Furthermore, the registered control must provide infinite 2317 * values for all applicable address space models supported by the operating 2318 * system. Attempts to set resource control values beyond the system limit 2319 * will fail. 2320 * 2321 * Return values 2322 * The rctl's ID. 2323 * 2324 * Caller's context 2325 * Caller must be in a context suitable for KM_SLEEP allocations. 2326 */ 2327 rctl_hndl_t 2328 rctl_register( 2329 const char *name, 2330 rctl_entity_t entity, 2331 int global_flags, 2332 rlim64_t max_native, 2333 rlim64_t max_ilp32, 2334 rctl_ops_t *ops) 2335 { 2336 rctl_t *rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 2337 rctl_val_t *rctl_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 2338 rctl_dict_entry_t *rctl_de = kmem_zalloc(sizeof (rctl_dict_entry_t), 2339 KM_SLEEP); 2340 rctl_t *old_rctl; 2341 rctl_hndl_t rhndl; 2342 int localflags; 2343 2344 ASSERT(ops != NULL); 2345 2346 bzero(rctl, sizeof (rctl_t)); 2347 bzero(rctl_val, sizeof (rctl_val_t)); 2348 2349 if (global_flags & RCTL_GLOBAL_DENY_NEVER) 2350 localflags = RCTL_LOCAL_MAXIMAL; 2351 else 2352 localflags = RCTL_LOCAL_MAXIMAL | RCTL_LOCAL_DENY; 2353 2354 rctl_val->rcv_privilege = RCPRIV_SYSTEM; 2355 rctl_val->rcv_value = max_native; 2356 rctl_val->rcv_flagaction = localflags; 2357 rctl_val->rcv_action_signal = 0; 2358 rctl_val->rcv_action_recipient = NULL; 2359 rctl_val->rcv_action_recip_pid = -1; 2360 rctl_val->rcv_firing_time = 0; 2361 rctl_val->rcv_next = NULL; 2362 rctl_val->rcv_prev = NULL; 2363 2364 rctl_de->rcd_name = (char *)name; 2365 rctl_de->rcd_default_value = rctl_val; 2366 rctl_de->rcd_max_native = max_native; 2367 rctl_de->rcd_max_ilp32 = max_ilp32; 2368 rctl_de->rcd_entity = entity; 2369 rctl_de->rcd_ops = ops; 2370 rctl_de->rcd_flagaction = global_flags; 2371 2372 rctl->rc_dict_entry = rctl_de; 2373 rctl->rc_values = rctl_val; 2374 2375 /* 2376 * 1. Take global lock, validate nonexistence of name, get ID. 2377 */ 2378 mutex_enter(&rctl_dict_lock); 2379 2380 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 2381 (mod_hash_val_t *)&rhndl) != MH_ERR_NOTFOUND) 2382 panic("duplicate registration of rctl %s", name); 2383 2384 rhndl = rctl_de->rcd_id = rctl->rc_id = 2385 (rctl_hndl_t)id_alloc(rctl_ids); 2386 2387 /* 2388 * 2. Insert name-entry pair in rctl_dict_by_name. 2389 */ 2390 if (mod_hash_insert(rctl_dict_by_name, (mod_hash_key_t)name, 2391 (mod_hash_val_t)rctl_de)) 2392 panic("unable to insert rctl dict entry for %s (%u)", name, 2393 (uint_t)rctl->rc_id); 2394 2395 /* 2396 * 3. Insert ID-rctl_t * pair in rctl_dict. 2397 */ 2398 if (mod_hash_find(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2399 (mod_hash_val_t *)&old_rctl) != MH_ERR_NOTFOUND) 2400 panic("duplicate rctl ID %u registered", rctl->rc_id); 2401 2402 if (mod_hash_insert(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2403 (mod_hash_val_t)rctl)) 2404 panic("unable to insert rctl %s/%u (%p)", name, 2405 (uint_t)rctl->rc_id, (void *)rctl); 2406 2407 /* 2408 * 3a. Insert rctl_dict_entry_t * in appropriate entity list. 2409 */ 2410 2411 mutex_enter(&rctl_lists_lock); 2412 2413 switch (entity) { 2414 case RCENTITY_ZONE: 2415 case RCENTITY_PROJECT: 2416 case RCENTITY_TASK: 2417 case RCENTITY_PROCESS: 2418 rctl_de->rcd_next = rctl_lists[entity]; 2419 rctl_lists[entity] = rctl_de; 2420 break; 2421 default: 2422 panic("registering unknown rctl entity %d (%s)", entity, 2423 name); 2424 break; 2425 } 2426 2427 mutex_exit(&rctl_lists_lock); 2428 2429 /* 2430 * 4. Drop lock. 2431 */ 2432 mutex_exit(&rctl_dict_lock); 2433 2434 return (rhndl); 2435 } 2436 2437 /* 2438 * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, 2439 * rctl_val_t *v) 2440 * 2441 * Overview 2442 * rctl_global_action() takes, in according with the flags on the rctl_dict 2443 * entry for the given control, the appropriate actions on the exceeded 2444 * control value. Additionally, rctl_global_action() updates the firing time 2445 * on the exceeded value. 2446 * 2447 * Return values 2448 * A bitmask reflecting the actions actually taken. 2449 * 2450 * Caller's context 2451 * No restrictions on context. 2452 */ 2453 /*ARGSUSED*/ 2454 static int 2455 rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v) 2456 { 2457 rctl_dict_entry_t *rde = r->rc_dict_entry; 2458 const char *pr, *en, *idstr; 2459 id_t id; 2460 enum { 2461 SUFFIX_NONE, /* id consumed directly */ 2462 SUFFIX_NUMERIC, /* id consumed in suffix */ 2463 SUFFIX_STRING /* idstr consumed in suffix */ 2464 } suffix = SUFFIX_NONE; 2465 int ret = 0; 2466 2467 v->rcv_firing_time = gethrtime(); 2468 2469 switch (v->rcv_privilege) { 2470 case RCPRIV_BASIC: 2471 pr = "basic"; 2472 break; 2473 case RCPRIV_PRIVILEGED: 2474 pr = "privileged"; 2475 break; 2476 case RCPRIV_SYSTEM: 2477 pr = "system"; 2478 break; 2479 default: 2480 pr = "unknown"; 2481 break; 2482 } 2483 2484 switch (rde->rcd_entity) { 2485 case RCENTITY_PROCESS: 2486 en = "process"; 2487 id = p->p_pid; 2488 suffix = SUFFIX_NONE; 2489 break; 2490 case RCENTITY_TASK: 2491 en = "task"; 2492 id = p->p_task->tk_tkid; 2493 suffix = SUFFIX_NUMERIC; 2494 break; 2495 case RCENTITY_PROJECT: 2496 en = "project"; 2497 id = p->p_task->tk_proj->kpj_id; 2498 suffix = SUFFIX_NUMERIC; 2499 break; 2500 case RCENTITY_ZONE: 2501 en = "zone"; 2502 idstr = p->p_zone->zone_name; 2503 suffix = SUFFIX_STRING; 2504 break; 2505 default: 2506 en = "unknown entity associated with process"; 2507 id = p->p_pid; 2508 suffix = SUFFIX_NONE; 2509 break; 2510 } 2511 2512 if (rde->rcd_flagaction & RCTL_GLOBAL_SYSLOG) { 2513 switch (suffix) { 2514 default: 2515 case SUFFIX_NONE: 2516 (void) strlog(0, 0, 0, 2517 rde->rcd_strlog_flags | log_global.lz_active, 2518 "%s rctl %s (value %llu) exceeded by %s %d.", 2519 pr, rde->rcd_name, v->rcv_value, en, id); 2520 break; 2521 case SUFFIX_NUMERIC: 2522 (void) strlog(0, 0, 0, 2523 rde->rcd_strlog_flags | log_global.lz_active, 2524 "%s rctl %s (value %llu) exceeded by process %d" 2525 " in %s %d.", 2526 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2527 en, id); 2528 break; 2529 case SUFFIX_STRING: 2530 (void) strlog(0, 0, 0, 2531 rde->rcd_strlog_flags | log_global.lz_active, 2532 "%s rctl %s (value %llu) exceeded by process %d" 2533 " in %s %s.", 2534 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2535 en, idstr); 2536 break; 2537 } 2538 } 2539 2540 if (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) 2541 ret |= RCT_DENY; 2542 2543 return (ret); 2544 } 2545 2546 static int 2547 rctl_local_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v, 2548 uint_t safety) 2549 { 2550 int ret = 0; 2551 sigqueue_t *sqp = NULL; 2552 rctl_dict_entry_t *rde = r->rc_dict_entry; 2553 int unobservable = (rde->rcd_flagaction & RCTL_GLOBAL_UNOBSERVABLE); 2554 2555 proc_t *recipient = v->rcv_action_recipient; 2556 id_t recip_pid = v->rcv_action_recip_pid; 2557 int recip_signal = v->rcv_action_signal; 2558 uint_t flagaction = v->rcv_flagaction; 2559 2560 if (safety == RCA_UNSAFE_ALL) { 2561 if (flagaction & RCTL_LOCAL_DENY) { 2562 ret |= RCT_DENY; 2563 } 2564 return (ret); 2565 } 2566 2567 if (flagaction & RCTL_LOCAL_SIGNAL) { 2568 /* 2569 * We can build a siginfo only in the case that it is 2570 * safe for us to drop p_lock. (For asynchronous 2571 * checks this is currently not true.) 2572 */ 2573 if (safety == RCA_SAFE) { 2574 mutex_exit(&rset->rcs_lock); 2575 mutex_exit(&p->p_lock); 2576 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 2577 mutex_enter(&p->p_lock); 2578 mutex_enter(&rset->rcs_lock); 2579 2580 sqp->sq_info.si_signo = recip_signal; 2581 sqp->sq_info.si_code = SI_RCTL; 2582 sqp->sq_info.si_errno = 0; 2583 sqp->sq_info.si_entity = (int)rde->rcd_entity; 2584 } 2585 2586 if (recipient == NULL || recipient == p) { 2587 ret |= RCT_SIGNAL; 2588 2589 if (sqp == NULL) { 2590 sigtoproc(p, NULL, recip_signal); 2591 } else if (p == curproc) { 2592 /* 2593 * Then this is a synchronous test and we can 2594 * direct the signal at the violating thread. 2595 */ 2596 sigaddqa(curproc, curthread, sqp); 2597 } else { 2598 sigaddqa(p, NULL, sqp); 2599 } 2600 } else if (!unobservable) { 2601 proc_t *rp; 2602 2603 mutex_exit(&rset->rcs_lock); 2604 mutex_exit(&p->p_lock); 2605 2606 mutex_enter(&pidlock); 2607 if ((rp = prfind(recip_pid)) == recipient) { 2608 /* 2609 * Recipient process is still alive, but may not 2610 * be in this task or project any longer. In 2611 * this case, the recipient's resource control 2612 * set pertinent to this control will have 2613 * changed--and we will not deliver the signal, 2614 * as the recipient process is trying to tear 2615 * itself off of its former set. 2616 */ 2617 mutex_enter(&rp->p_lock); 2618 mutex_exit(&pidlock); 2619 2620 if (rctl_entity_obtain_rset(rde, rp) == rset) { 2621 ret |= RCT_SIGNAL; 2622 2623 if (sqp == NULL) 2624 sigtoproc(rp, NULL, 2625 recip_signal); 2626 else 2627 sigaddqa(rp, NULL, sqp); 2628 } else if (sqp) { 2629 kmem_free(sqp, sizeof (sigqueue_t)); 2630 } 2631 mutex_exit(&rp->p_lock); 2632 } else { 2633 mutex_exit(&pidlock); 2634 if (sqp) 2635 kmem_free(sqp, sizeof (sigqueue_t)); 2636 } 2637 2638 mutex_enter(&p->p_lock); 2639 /* 2640 * Since we dropped p_lock, we may no longer be in the 2641 * same task or project as we were at entry. It is thus 2642 * unsafe for us to reacquire the set lock at this 2643 * point; callers of rctl_local_action() must handle 2644 * this possibility. 2645 */ 2646 ret |= RCT_LK_ABANDONED; 2647 } else if (sqp) { 2648 kmem_free(sqp, sizeof (sigqueue_t)); 2649 } 2650 } 2651 2652 if ((flagaction & RCTL_LOCAL_DENY) && 2653 (recipient == NULL || recipient == p)) { 2654 ret |= RCT_DENY; 2655 } 2656 2657 return (ret); 2658 } 2659 2660 /* 2661 * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t) 2662 * 2663 * Overview 2664 * Take the action associated with the enforced value (as defined by 2665 * rctl_get_enforced_value()) being exceeded or encountered. Possibly perform 2666 * a restricted subset of the available actions, if circumstances dictate that 2667 * we cannot safely allocate memory (for a sigqueue_t) or guarantee process 2668 * persistence across the duration of the function (an asynchronous action). 2669 * 2670 * Return values 2671 * Actions taken, according to the rctl_test bitmask. 2672 * 2673 * Caller's context 2674 * Safe to acquire rcs_lock. 2675 */ 2676 int 2677 rctl_action(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, uint_t safety) 2678 { 2679 return (rctl_action_entity(hndl, rset, p, NULL, safety)); 2680 } 2681 2682 int 2683 rctl_action_entity(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, 2684 rctl_entity_p_t *e, uint_t safety) 2685 { 2686 int ret = RCT_NONE; 2687 rctl_t *lrctl; 2688 rctl_entity_p_t e_tmp; 2689 2690 rctl_action_acquire: 2691 mutex_enter(&rset->rcs_lock); 2692 if (rctl_set_find(rset, hndl, &lrctl) == -1) { 2693 mutex_exit(&rset->rcs_lock); 2694 return (ret); 2695 } 2696 2697 if (e == NULL) { 2698 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2699 p, &e_tmp); 2700 e = &e_tmp; 2701 } 2702 2703 if ((ret & RCT_LK_ABANDONED) == 0) { 2704 ret |= rctl_global_action(lrctl, rset, p, lrctl->rc_cursor); 2705 2706 RCTLOP_ACTION(lrctl, p, e); 2707 2708 ret |= rctl_local_action(lrctl, rset, p, 2709 lrctl->rc_cursor, safety); 2710 2711 if (ret & RCT_LK_ABANDONED) 2712 goto rctl_action_acquire; 2713 } 2714 2715 ret &= ~RCT_LK_ABANDONED; 2716 2717 if (!(ret & RCT_DENY) && 2718 lrctl->rc_cursor->rcv_next != NULL) { 2719 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2720 2721 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2722 p, lrctl->rc_cursor->rcv_value)); 2723 2724 } 2725 mutex_exit(&rset->rcs_lock); 2726 2727 return (ret); 2728 } 2729 2730 /* 2731 * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t) 2732 * 2733 * Overview 2734 * Increment the resource associated with the given handle, returning zero if 2735 * the incremented value does not exceed the threshold for the current limit 2736 * on the resource. 2737 * 2738 * Return values 2739 * Actions taken, according to the rctl_test bitmask. 2740 * 2741 * Caller's context 2742 * p_lock held by caller. 2743 */ 2744 /*ARGSUSED*/ 2745 int 2746 rctl_test(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2747 rctl_qty_t incr, uint_t flags) 2748 { 2749 return (rctl_test_entity(rhndl, rset, p, NULL, incr, flags)); 2750 } 2751 2752 int 2753 rctl_test_entity(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2754 rctl_entity_p_t *e, rctl_qty_t incr, uint_t flags) 2755 { 2756 rctl_t *lrctl; 2757 int ret = RCT_NONE; 2758 rctl_entity_p_t e_tmp; 2759 if (p == &p0) { 2760 /* 2761 * We don't enforce rctls on the kernel itself. 2762 */ 2763 return (ret); 2764 } 2765 2766 rctl_test_acquire: 2767 ASSERT(MUTEX_HELD(&p->p_lock)); 2768 2769 mutex_enter(&rset->rcs_lock); 2770 2771 /* 2772 * Dereference from rctl_set. We don't enforce newly loaded controls 2773 * that haven't been set on this entity (since the only valid value is 2774 * the infinite system value). 2775 */ 2776 if (rctl_set_find(rset, rhndl, &lrctl) == -1) { 2777 mutex_exit(&rset->rcs_lock); 2778 return (ret); 2779 } 2780 2781 /* 2782 * This control is currently unenforced: maximal value on control 2783 * supporting infinitely available resource. 2784 */ 2785 if ((lrctl->rc_dict_entry->rcd_flagaction & RCTL_GLOBAL_INFINITE) && 2786 (lrctl->rc_cursor->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) { 2787 2788 mutex_exit(&rset->rcs_lock); 2789 return (ret); 2790 } 2791 2792 /* 2793 * If we have been called by rctl_test, look up the entity pointer 2794 * from the proc pointer. 2795 */ 2796 if (e == NULL) { 2797 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2798 p, &e_tmp); 2799 e = &e_tmp; 2800 } 2801 2802 /* 2803 * Get enforced rctl value and current usage. Test the increment 2804 * with the current usage against the enforced value--take action as 2805 * necessary. 2806 */ 2807 while (RCTLOP_TEST(lrctl, p, e, lrctl->rc_cursor, incr, flags)) { 2808 if ((ret & RCT_LK_ABANDONED) == 0) { 2809 ret |= rctl_global_action(lrctl, rset, p, 2810 lrctl->rc_cursor); 2811 2812 RCTLOP_ACTION(lrctl, p, e); 2813 2814 ret |= rctl_local_action(lrctl, rset, p, 2815 lrctl->rc_cursor, flags); 2816 2817 if (ret & RCT_LK_ABANDONED) 2818 goto rctl_test_acquire; 2819 } 2820 2821 ret &= ~RCT_LK_ABANDONED; 2822 2823 if ((ret & RCT_DENY) == RCT_DENY || 2824 lrctl->rc_cursor->rcv_next == NULL) { 2825 ret |= RCT_DENY; 2826 break; 2827 } 2828 2829 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2830 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2831 p, lrctl->rc_cursor->rcv_value)); 2832 } 2833 2834 mutex_exit(&rset->rcs_lock); 2835 2836 return (ret); 2837 } 2838 2839 /* 2840 * void rctl_init(void) 2841 * 2842 * Overview 2843 * Initialize the rctl subsystem, including the primoridal rctls 2844 * provided by the system. New subsystem-specific rctls should _not_ be 2845 * initialized here. (Do it in your own file.) 2846 * 2847 * Return values 2848 * None. 2849 * 2850 * Caller's context 2851 * Safe for KM_SLEEP allocations. Must be called prior to any process model 2852 * initialization. 2853 */ 2854 void 2855 rctl_init(void) 2856 { 2857 rctl_cache = kmem_cache_create("rctl_cache", sizeof (rctl_t), 2858 0, NULL, NULL, NULL, NULL, NULL, 0); 2859 rctl_val_cache = kmem_cache_create("rctl_val_cache", 2860 sizeof (rctl_val_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 2861 2862 rctl_dict = mod_hash_create_extended("rctl_dict", 2863 rctl_dict_size, mod_hash_null_keydtor, rctl_dict_val_dtor, 2864 rctl_dict_hash_by_id, NULL, rctl_dict_id_cmp, KM_SLEEP); 2865 rctl_dict_by_name = mod_hash_create_strhash( 2866 "rctl_handles_by_name", rctl_dict_size, 2867 mod_hash_null_valdtor); 2868 rctl_ids = id_space_create("rctl_ids", 1, max_rctl_hndl); 2869 bzero(rctl_lists, (RC_MAX_ENTITY + 1) * sizeof (rctl_dict_entry_t *)); 2870 2871 rctlproc_init(); 2872 } 2873 2874 /* 2875 * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc) 2876 * 2877 * Increments the amount of locked memory on a project, and 2878 * zone. If proj is NULL, the proj and zone of proc_t p is used. If 2879 * chargeproc is non-zero, then the charged amount is cached on p->p_locked_mem 2880 * so that the charge can be migrated when a process changes projects. 2881 * 2882 * Return values 2883 * 0 - success 2884 * EAGAIN - attempting to increment locked memory is denied by one 2885 * or more resource entities. 2886 */ 2887 int 2888 rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2889 int chargeproc) 2890 { 2891 kproject_t *projp; 2892 zone_t *zonep; 2893 rctl_entity_p_t e; 2894 int ret = 0; 2895 2896 ASSERT(p != NULL); 2897 ASSERT(MUTEX_HELD(&p->p_lock)); 2898 if (proj != NULL) { 2899 projp = proj; 2900 zonep = zone_find_by_id(projp->kpj_zoneid); 2901 } else { 2902 projp = p->p_task->tk_proj; 2903 zonep = p->p_zone; 2904 } 2905 2906 mutex_enter(&zonep->zone_mem_lock); 2907 2908 e.rcep_p.proj = projp; 2909 e.rcep_t = RCENTITY_PROJECT; 2910 if (projp->kpj_data.kpd_locked_mem + inc > 2911 projp->kpj_data.kpd_locked_mem_ctl) { 2912 if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls, 2913 p, &e, inc, 0) & RCT_DENY) { 2914 ret = EAGAIN; 2915 goto out; 2916 } 2917 } 2918 e.rcep_p.zone = zonep; 2919 e.rcep_t = RCENTITY_ZONE; 2920 if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) { 2921 if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls, 2922 p, &e, inc, 0) & RCT_DENY) { 2923 ret = EAGAIN; 2924 goto out; 2925 } 2926 } 2927 2928 zonep->zone_locked_mem += inc; 2929 projp->kpj_data.kpd_locked_mem += inc; 2930 if (chargeproc != 0) { 2931 p->p_locked_mem += inc; 2932 } 2933 out: 2934 mutex_exit(&zonep->zone_mem_lock); 2935 if (proj != NULL) 2936 zone_rele(zonep); 2937 return (ret); 2938 } 2939 2940 /* 2941 * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc) 2942 * 2943 * Decrements the amount of locked memory on a project and 2944 * zone. If proj is NULL, the proj and zone of proc_t p is used. If 2945 * creditproc is non-zero, then the quantity of locked memory is subtracted 2946 * from p->p_locked_mem. 2947 * 2948 * Return values 2949 * none 2950 */ 2951 void 2952 rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2953 int creditproc) 2954 { 2955 kproject_t *projp; 2956 zone_t *zonep; 2957 2958 if (proj != NULL) { 2959 projp = proj; 2960 zonep = zone_find_by_id(projp->kpj_zoneid); 2961 } else { 2962 ASSERT(p != NULL); 2963 ASSERT(MUTEX_HELD(&p->p_lock)); 2964 projp = p->p_task->tk_proj; 2965 zonep = p->p_zone; 2966 } 2967 2968 mutex_enter(&zonep->zone_mem_lock); 2969 zonep->zone_locked_mem -= inc; 2970 projp->kpj_data.kpd_locked_mem -= inc; 2971 if (creditproc != 0) { 2972 ASSERT(p != NULL); 2973 ASSERT(MUTEX_HELD(&p->p_lock)); 2974 p->p_locked_mem -= inc; 2975 } 2976 mutex_exit(&zonep->zone_mem_lock); 2977 if (proj != NULL) 2978 zone_rele(zonep); 2979 } 2980 2981 /* 2982 * rctl_incr_swap(proc_t *, zone_t *, size_t) 2983 * 2984 * Overview 2985 * Increments the swap charge on the specified zone. 2986 * 2987 * Return values 2988 * 0 on success. EAGAIN if swap increment fails due an rctl value 2989 * on the zone. 2990 * 2991 * Callers context 2992 * p_lock held on specified proc. 2993 * swap must be even multiple of PAGESIZE 2994 */ 2995 int 2996 rctl_incr_swap(proc_t *proc, zone_t *zone, size_t swap) 2997 { 2998 rctl_entity_p_t e; 2999 3000 ASSERT(MUTEX_HELD(&proc->p_lock)); 3001 ASSERT((swap & PAGEOFFSET) == 0); 3002 e.rcep_p.zone = zone; 3003 e.rcep_t = RCENTITY_ZONE; 3004 3005 mutex_enter(&zone->zone_mem_lock); 3006 3007 if ((zone->zone_max_swap + swap) > 3008 zone->zone_max_swap_ctl) { 3009 3010 if (rctl_test_entity(rc_zone_max_swap, zone->zone_rctls, 3011 proc, &e, swap, 0) & RCT_DENY) { 3012 mutex_exit(&zone->zone_mem_lock); 3013 return (EAGAIN); 3014 } 3015 } 3016 zone->zone_max_swap += swap; 3017 mutex_exit(&zone->zone_mem_lock); 3018 return (0); 3019 } 3020 3021 /* 3022 * rctl_decr_swap(zone_t *, size_t) 3023 * 3024 * Overview 3025 * Decrements the swap charge on the specified zone. 3026 * 3027 * Return values 3028 * None 3029 * 3030 * Callers context 3031 * swap must be even multiple of PAGESIZE 3032 */ 3033 void 3034 rctl_decr_swap(zone_t *zone, size_t swap) 3035 { 3036 ASSERT((swap & PAGEOFFSET) == 0); 3037 mutex_enter(&zone->zone_mem_lock); 3038 ASSERT(zone->zone_max_swap >= swap); 3039 zone->zone_max_swap -= swap; 3040 mutex_exit(&zone->zone_mem_lock); 3041 } 3042 3043 /* 3044 * Create resource kstat 3045 */ 3046 static kstat_t * 3047 rctl_kstat_create_common(char *ks_name, int ks_instance, char *ks_class, 3048 uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, int ks_zoneid) 3049 { 3050 kstat_t *ksp = NULL; 3051 char name[KSTAT_STRLEN]; 3052 3053 (void) snprintf(name, KSTAT_STRLEN, "%s_%d", ks_name, ks_instance); 3054 3055 if ((ksp = kstat_create_zone("caps", ks_zoneid, 3056 name, ks_class, ks_type, 3057 ks_ndata, ks_flags, ks_zoneid)) != NULL) { 3058 if (ks_zoneid != GLOBAL_ZONEID) 3059 kstat_zone_add(ksp, GLOBAL_ZONEID); 3060 } 3061 return (ksp); 3062 } 3063 3064 /* 3065 * Create zone-specific resource kstat 3066 */ 3067 kstat_t * 3068 rctl_kstat_create_zone(zone_t *zone, char *ks_name, uchar_t ks_type, 3069 uint_t ks_ndata, uchar_t ks_flags) 3070 { 3071 char name[KSTAT_STRLEN]; 3072 3073 (void) snprintf(name, KSTAT_STRLEN, "%s_zone", ks_name); 3074 3075 return (rctl_kstat_create_common(name, zone->zone_id, "zone_caps", 3076 ks_type, ks_ndata, ks_flags, zone->zone_id)); 3077 } 3078 3079 /* 3080 * Create project-specific resource kstat 3081 */ 3082 kstat_t * 3083 rctl_kstat_create_project(kproject_t *kpj, char *ks_name, uchar_t ks_type, 3084 uint_t ks_ndata, uchar_t ks_flags) 3085 { 3086 char name[KSTAT_STRLEN]; 3087 3088 (void) snprintf(name, KSTAT_STRLEN, "%s_project", ks_name); 3089 3090 return (rctl_kstat_create_common(name, kpj->kpj_id, "project_caps", 3091 ks_type, ks_ndata, ks_flags, kpj->kpj_zoneid)); 3092 } 3093