1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/atomic.h> 26 #include <sys/cmn_err.h> 27 #include <sys/id_space.h> 28 #include <sys/kmem.h> 29 #include <sys/kstat.h> 30 #include <sys/log.h> 31 #include <sys/modctl.h> 32 #include <sys/modhash.h> 33 #include <sys/mutex.h> 34 #include <sys/proc.h> 35 #include <sys/procset.h> 36 #include <sys/project.h> 37 #include <sys/resource.h> 38 #include <sys/rctl.h> 39 #include <sys/siginfo.h> 40 #include <sys/strlog.h> 41 #include <sys/systm.h> 42 #include <sys/task.h> 43 #include <sys/types.h> 44 #include <sys/policy.h> 45 #include <sys/zone.h> 46 47 /* 48 * Resource controls (rctls) 49 * 50 * The rctl subsystem provides a mechanism for kernel components to 51 * register their individual resource controls with the system as a whole, 52 * such that those controls can subscribe to specific actions while being 53 * associated with the various process-model entities provided by the kernel: 54 * the process, the task, the project, and the zone. (In principle, only 55 * minor modifications would be required to connect the resource control 56 * functionality to non-process-model entities associated with the system.) 57 * 58 * Subsystems register their rctls via rctl_register(). Subsystems 59 * also wishing to provide additional limits on a given rctl can modify 60 * them once they have the rctl handle. Each subsystem should store the 61 * handle to their rctl for direct access. 62 * 63 * A primary dictionary, rctl_dict, contains a hash of id to the default 64 * control definition for each controlled resource-entity pair on the system. 65 * A secondary dictionary, rctl_dict_by_name, contains a hash of name to 66 * resource control handles. The resource control handles are distributed by 67 * the rctl_ids ID space. The handles are private and not to be 68 * advertised to userland; all userland interactions are via the rctl 69 * names. 70 * 71 * Entities inherit their rctls from their predecessor. Since projects have 72 * no ancestor, they inherit their rctls from the rctl dict for project 73 * rctls. It is expected that project controls will be set to their 74 * appropriate values shortly after project creation, presumably from a 75 * policy source such as the project database. 76 * 77 * Data structures 78 * The rctl_set_t attached to each of the process model entities is a simple 79 * hash table keyed on the rctl handle assigned at registration. The entries 80 * in the hash table are rctl_t's, whose relationship with the active control 81 * values on that resource and with the global state of the resource we 82 * illustrate below: 83 * 84 * rctl_dict[key] --> rctl_dict_entry 85 * ^ 86 * | 87 * +--+---+ 88 * rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL 89 * +--+---+ ^ 90 * | | 91 * +------- cursor ------+ 92 * 93 * That is, the rctl contains a back pointer to the global resource control 94 * state for this resource, which is also available in the rctl_dict hash 95 * table mentioned earlier. The rctl contains two pointers to resource 96 * control values: one, values, indicates the entire sequence of control 97 * values; the other, cursor, indicates the currently active control 98 * value--the next value to be enforced. The value list itself is an open, 99 * doubly-linked list, the last non-NULL member of which is the system value 100 * for that resource (being the theoretical/conventional maximum allowable 101 * value for the resource on this OS instance). 102 * 103 * Ops Vector 104 * Subsystems publishing rctls need not provide instances of all of the 105 * functions specified by the ops vector. In particular, if general 106 * rctl_*() entry points are not being called, certain functions can be 107 * omitted. These align as follows: 108 * 109 * rctl_set() 110 * You may wish to provide a set callback if locking circumstances prevent 111 * it or if the performance cost of requesting the enforced value from the 112 * resource control is prohibitively expensive. For instance, the currently 113 * enforced file size limit is stored on the process in the p_fsz_ctl to 114 * maintain read()/write() performance. 115 * 116 * rctl_test() 117 * You must provide a test callback if you are using the rctl_test() 118 * interface. An action callback is optional. 119 * 120 * rctl_action() 121 * You may wish to provide an action callback. 122 * 123 * Registration 124 * New resource controls can be added to a running instance by loaded modules 125 * via registration. (The current implementation does not support unloadable 126 * modules; this functionality can be added if needed, via an 127 * activation/deactivation interface involving the manipulation of the 128 * ops vector for the resource control(s) needing to support unloading.) 129 * 130 * Control value ordering 131 * Because the rctl_val chain on each rctl must be navigable in a 132 * deterministic way, we have to define an ordering on the rctl_val_t's. The 133 * defined order is (flags & [maximal], value, flags & [deny-action], 134 * privilege). 135 * 136 * Locking 137 * rctl_dict_lock must be acquired prior to rctl_lists_lock. Since 138 * rctl_dict_lock or rctl_lists_lock can be called at the enforcement point 139 * of any subsystem, holding subsystem locks, it is at all times inappropriate 140 * to call kmem_alloc(., KM_SLEEP) while holding either of these locks. 141 * Traversing any of the various resource control entity lists requires 142 * holding rctl_lists_lock. 143 * 144 * Each individual resource control set associated with an entity must have 145 * its rcs_lock held for the duration of any operations that would add 146 * resource controls or control values to the set. 147 * 148 * The locking subsequence of interest is: p_lock, rctl_dict_lock, 149 * rctl_lists_lock, entity->rcs_lock. 150 * 151 * The project(5) database and project entity resource controls 152 * A special case is made for RCENTITY_PROJECT values set through the 153 * setproject(3PROJECT) interface. setproject() makes use of a private 154 * interface, setprojrctl(), which passes through an array of resource control 155 * blocks that need to be set while holding the entity->rcs_lock. This 156 * ensures that the act of modifying a project's resource controls is 157 * "atomic" within the kernel. 158 * 159 * Within the rctl sub-system, we provide two interfaces that are only used by 160 * the setprojrctl() code path - rctl_local_insert_all() and 161 * rctl_local_replace_all(). rctl_local_insert_all() will ensure that the 162 * resource values specified in *new_values are applied. 163 * rctl_local_replace_all() will purge the current rctl->rc_projdb and 164 * rctl->rc_values entries, and apply the *new_values. 165 * 166 * These functions modify not only the linked list of active resource controls 167 * (rctl->rc_values), but also a "cached" linked list (rctl->rc_projdb) of 168 * values set through these interfaces. To clarify: 169 * 170 * rctl->rc_values - a linked list of rctl_val_t. These are the active 171 * resource values associated with this rctl, and may have been set by 172 * setrctl() - via prctl(1), or by setprojrctl() - via 173 * setproject(3PROJECT). 174 * 175 * rctl->rc_projdb - a linked list of rctl_val_t. These reflect the 176 * resource values set by the setprojrctl() code path. rc_projdb is not 177 * referenced by any other component of the rctl sub-system. 178 * 179 * As various locks are held when calling these functions, we ensure that all 180 * the possible memory allocations are performed prior to calling the 181 * function. *alloc_values is a linked list of uninitialized rctl_val_t, 182 * which may be used to duplicate a new resource control value (passed in as 183 * one of the members of the *new_values linked list), in order to populate 184 * rctl->rc_values. 185 */ 186 187 id_t max_rctl_hndl = 32768; 188 int rctl_dict_size = 64; 189 int rctl_set_size = 8; 190 kmutex_t rctl_dict_lock; 191 mod_hash_t *rctl_dict; 192 mod_hash_t *rctl_dict_by_name; 193 id_space_t *rctl_ids; 194 kmem_cache_t *rctl_cache; /* kmem cache for rctl structures */ 195 kmem_cache_t *rctl_val_cache; /* kmem cache for rctl values */ 196 197 kmutex_t rctl_lists_lock; 198 rctl_dict_entry_t *rctl_lists[RC_MAX_ENTITY + 1]; 199 200 /* 201 * Default resource control operations and ops vector 202 * To be used if the particular rcontrol has no specific actions defined, or 203 * if the subsystem providing the control is quiescing (in preparation for 204 * unloading, presumably.) 205 * 206 * Resource controls with callbacks should fill the unused operations with the 207 * appropriate default impotent callback. 208 */ 209 /*ARGSUSED*/ 210 void 211 rcop_no_action(struct rctl *r, struct proc *p, rctl_entity_p_t *e) 212 { 213 } 214 215 /*ARGSUSED*/ 216 rctl_qty_t 217 rcop_no_usage(struct rctl *r, struct proc *p) 218 { 219 return (0); 220 } 221 222 /*ARGSUSED*/ 223 int 224 rcop_no_set(struct rctl *r, struct proc *p, rctl_entity_p_t *e, rctl_qty_t l) 225 { 226 return (0); 227 } 228 229 /*ARGSUSED*/ 230 int 231 rcop_no_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 232 struct rctl_val *rv, rctl_qty_t i, uint_t f) 233 { 234 return (0); 235 } 236 237 rctl_ops_t rctl_default_ops = { 238 rcop_no_action, 239 rcop_no_usage, 240 rcop_no_set, 241 rcop_no_test 242 }; 243 244 /* 245 * Default "absolute" resource control operation and ops vector 246 * Useful if there is no usage associated with the 247 * resource control. 248 */ 249 /*ARGSUSED*/ 250 int 251 rcop_absolute_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e, 252 struct rctl_val *rv, rctl_qty_t i, uint_t f) 253 { 254 return (i > rv->rcv_value); 255 } 256 257 rctl_ops_t rctl_absolute_ops = { 258 rcop_no_action, 259 rcop_no_usage, 260 rcop_no_set, 261 rcop_absolute_test 262 }; 263 264 /*ARGSUSED*/ 265 static uint_t 266 rctl_dict_hash_by_id(void *hash_data, mod_hash_key_t key) 267 { 268 return ((uint_t)(uintptr_t)key % rctl_dict_size); 269 } 270 271 static int 272 rctl_dict_id_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 273 { 274 uint_t u1 = (uint_t)(uintptr_t)key1; 275 uint_t u2 = (uint_t)(uintptr_t)key2; 276 277 if (u1 > u2) 278 return (1); 279 280 if (u1 == u2) 281 return (0); 282 283 return (-1); 284 } 285 286 static void 287 rctl_dict_val_dtor(mod_hash_val_t val) 288 { 289 rctl_dict_entry_t *kr = (rctl_dict_entry_t *)val; 290 291 kmem_free(kr, sizeof (rctl_dict_entry_t)); 292 } 293 294 /* 295 * size_t rctl_build_name_buf() 296 * 297 * Overview 298 * rctl_build_name_buf() walks all active resource controls in the dictionary, 299 * building a buffer of continguous NUL-terminated strings. 300 * 301 * Return values 302 * The size of the buffer is returned, the passed pointer's contents are 303 * modified to that of the location of the buffer. 304 * 305 * Caller's context 306 * Caller must be in a context suitable for KM_SLEEP allocations. 307 */ 308 size_t 309 rctl_build_name_buf(char **rbufp) 310 { 311 size_t req_size, cpy_size; 312 char *rbufloc; 313 int i; 314 315 rctl_rebuild_name_buf: 316 req_size = cpy_size = 0; 317 318 /* 319 * Calculate needed buffer length. 320 */ 321 mutex_enter(&rctl_lists_lock); 322 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 323 rctl_dict_entry_t *rde; 324 325 for (rde = rctl_lists[i]; 326 rde != NULL; 327 rde = rde->rcd_next) 328 req_size += strlen(rde->rcd_name) + 1; 329 } 330 mutex_exit(&rctl_lists_lock); 331 332 rbufloc = *rbufp = kmem_alloc(req_size, KM_SLEEP); 333 334 /* 335 * Copy rctl names into our buffer. If the copy length exceeds the 336 * allocate length (due to registration changes), stop copying, free the 337 * buffer, and start again. 338 */ 339 mutex_enter(&rctl_lists_lock); 340 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 341 rctl_dict_entry_t *rde; 342 343 for (rde = rctl_lists[i]; 344 rde != NULL; 345 rde = rde->rcd_next) { 346 size_t length = strlen(rde->rcd_name) + 1; 347 348 cpy_size += length; 349 350 if (cpy_size > req_size) { 351 kmem_free(*rbufp, req_size); 352 mutex_exit(&rctl_lists_lock); 353 goto rctl_rebuild_name_buf; 354 } 355 356 bcopy(rde->rcd_name, rbufloc, length); 357 rbufloc += length; 358 } 359 } 360 mutex_exit(&rctl_lists_lock); 361 362 return (req_size); 363 } 364 365 /* 366 * rctl_dict_entry_t *rctl_dict_lookup(const char *) 367 * 368 * Overview 369 * rctl_dict_lookup() returns the resource control dictionary entry for the 370 * named resource control. 371 * 372 * Return values 373 * A pointer to the appropriate resource control dictionary entry, or NULL if 374 * no such named entry exists. 375 * 376 * Caller's context 377 * Caller must not be holding rctl_dict_lock. 378 */ 379 rctl_dict_entry_t * 380 rctl_dict_lookup(const char *name) 381 { 382 rctl_dict_entry_t *rde; 383 384 mutex_enter(&rctl_dict_lock); 385 386 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 387 (mod_hash_val_t *)&rde) == MH_ERR_NOTFOUND) { 388 mutex_exit(&rctl_dict_lock); 389 return (NULL); 390 } 391 392 mutex_exit(&rctl_dict_lock); 393 394 return (rde); 395 } 396 397 /* 398 * rctl_hndl_t rctl_hndl_lookup(const char *) 399 * 400 * Overview 401 * rctl_hndl_lookup() returns the resource control id (the "handle") for the 402 * named resource control. 403 * 404 * Return values 405 * The appropriate id, or -1 if no such named entry exists. 406 * 407 * Caller's context 408 * Caller must not be holding rctl_dict_lock. 409 */ 410 rctl_hndl_t 411 rctl_hndl_lookup(const char *name) 412 { 413 rctl_dict_entry_t *rde; 414 415 if ((rde = rctl_dict_lookup(name)) == NULL) 416 return (-1); 417 418 return (rde->rcd_id); 419 } 420 421 /* 422 * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t) 423 * 424 * Overview 425 * rctl_dict_lookup_hndl() completes the public lookup functions, by returning 426 * the resource control dictionary entry matching a given resource control id. 427 * 428 * Return values 429 * A pointer to the matching resource control dictionary entry, or NULL if the 430 * id does not match any existing entries. 431 * 432 * Caller's context 433 * Caller must not be holding rctl_lists_lock. 434 */ 435 rctl_dict_entry_t * 436 rctl_dict_lookup_hndl(rctl_hndl_t hndl) 437 { 438 uint_t i; 439 440 mutex_enter(&rctl_lists_lock); 441 for (i = 0; i < RC_MAX_ENTITY + 1; i++) { 442 rctl_dict_entry_t *rde; 443 444 for (rde = rctl_lists[i]; 445 rde != NULL; 446 rde = rde->rcd_next) 447 if (rde->rcd_id == hndl) { 448 mutex_exit(&rctl_lists_lock); 449 return (rde); 450 } 451 } 452 mutex_exit(&rctl_lists_lock); 453 454 return (NULL); 455 } 456 457 /* 458 * void rctl_add_default_limit(const char *name, rctl_qty_t value, 459 * rctl_priv_t privilege, uint_t action) 460 * 461 * Overview 462 * Create a default limit with specified value, privilege, and action. 463 * 464 * Return value 465 * No value returned. 466 */ 467 void 468 rctl_add_default_limit(const char *name, rctl_qty_t value, 469 rctl_priv_t privilege, uint_t action) 470 { 471 rctl_val_t *dval; 472 rctl_dict_entry_t *rde; 473 474 dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 475 bzero(dval, sizeof (rctl_val_t)); 476 dval->rcv_value = value; 477 dval->rcv_privilege = privilege; 478 dval->rcv_flagaction = action; 479 dval->rcv_action_recip_pid = -1; 480 481 rde = rctl_dict_lookup(name); 482 (void) rctl_val_list_insert(&rde->rcd_default_value, dval); 483 } 484 485 /* 486 * void rctl_add_legacy_limit(const char *name, const char *mname, 487 * const char *lname, rctl_qty_t dflt) 488 * 489 * Overview 490 * Create a default privileged limit, using the value obtained from 491 * /etc/system if it exists and is greater than the specified default 492 * value. Exists primarily for System V IPC. 493 * 494 * Return value 495 * No value returned. 496 */ 497 void 498 rctl_add_legacy_limit(const char *name, const char *mname, const char *lname, 499 rctl_qty_t dflt, rctl_qty_t max) 500 { 501 rctl_qty_t qty; 502 503 if (!mod_sysvar(mname, lname, &qty) || (qty < dflt)) 504 qty = dflt; 505 506 if (qty > max) 507 qty = max; 508 509 rctl_add_default_limit(name, qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY); 510 } 511 512 rctl_set_t * 513 rctl_entity_obtain_rset(rctl_dict_entry_t *rcd, struct proc *p) 514 { 515 rctl_set_t *rset = NULL; 516 517 if (rcd == NULL) 518 return (NULL); 519 520 switch (rcd->rcd_entity) { 521 case RCENTITY_PROCESS: 522 rset = p->p_rctls; 523 break; 524 case RCENTITY_TASK: 525 ASSERT(MUTEX_HELD(&p->p_lock)); 526 if (p->p_task != NULL) 527 rset = p->p_task->tk_rctls; 528 break; 529 case RCENTITY_PROJECT: 530 ASSERT(MUTEX_HELD(&p->p_lock)); 531 if (p->p_task != NULL && 532 p->p_task->tk_proj != NULL) 533 rset = p->p_task->tk_proj->kpj_rctls; 534 break; 535 case RCENTITY_ZONE: 536 ASSERT(MUTEX_HELD(&p->p_lock)); 537 if (p->p_zone != NULL) 538 rset = p->p_zone->zone_rctls; 539 break; 540 default: 541 panic("unknown rctl entity type %d seen", rcd->rcd_entity); 542 break; 543 } 544 545 return (rset); 546 } 547 548 static void 549 rctl_entity_obtain_entity_p(rctl_entity_t entity, struct proc *p, 550 rctl_entity_p_t *e) 551 { 552 e->rcep_p.proc = NULL; 553 e->rcep_t = entity; 554 555 switch (entity) { 556 case RCENTITY_PROCESS: 557 e->rcep_p.proc = p; 558 break; 559 case RCENTITY_TASK: 560 ASSERT(MUTEX_HELD(&p->p_lock)); 561 if (p->p_task != NULL) 562 e->rcep_p.task = p->p_task; 563 break; 564 case RCENTITY_PROJECT: 565 ASSERT(MUTEX_HELD(&p->p_lock)); 566 if (p->p_task != NULL && 567 p->p_task->tk_proj != NULL) 568 e->rcep_p.proj = p->p_task->tk_proj; 569 break; 570 case RCENTITY_ZONE: 571 ASSERT(MUTEX_HELD(&p->p_lock)); 572 if (p->p_zone != NULL) 573 e->rcep_p.zone = p->p_zone; 574 break; 575 default: 576 panic("unknown rctl entity type %d seen", entity); 577 break; 578 } 579 } 580 581 static void 582 rctl_gp_alloc(rctl_alloc_gp_t *rcgp) 583 { 584 uint_t i; 585 586 if (rcgp->rcag_nctls > 0) { 587 rctl_t *prev = kmem_cache_alloc(rctl_cache, KM_SLEEP); 588 rctl_t *rctl = prev; 589 590 rcgp->rcag_ctls = prev; 591 592 for (i = 1; i < rcgp->rcag_nctls; i++) { 593 rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 594 prev->rc_next = rctl; 595 prev = rctl; 596 } 597 598 rctl->rc_next = NULL; 599 } 600 601 if (rcgp->rcag_nvals > 0) { 602 rctl_val_t *prev = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 603 rctl_val_t *rval = prev; 604 605 rcgp->rcag_vals = prev; 606 607 for (i = 1; i < rcgp->rcag_nvals; i++) { 608 rval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 609 prev->rcv_next = rval; 610 prev = rval; 611 } 612 613 rval->rcv_next = NULL; 614 } 615 616 } 617 618 static rctl_val_t * 619 rctl_gp_detach_val(rctl_alloc_gp_t *rcgp) 620 { 621 rctl_val_t *rval = rcgp->rcag_vals; 622 623 ASSERT(rcgp->rcag_nvals > 0); 624 rcgp->rcag_nvals--; 625 rcgp->rcag_vals = rval->rcv_next; 626 627 rval->rcv_next = NULL; 628 629 return (rval); 630 } 631 632 static rctl_t * 633 rctl_gp_detach_ctl(rctl_alloc_gp_t *rcgp) 634 { 635 rctl_t *rctl = rcgp->rcag_ctls; 636 637 ASSERT(rcgp->rcag_nctls > 0); 638 rcgp->rcag_nctls--; 639 rcgp->rcag_ctls = rctl->rc_next; 640 641 rctl->rc_next = NULL; 642 643 return (rctl); 644 645 } 646 647 static void 648 rctl_gp_free(rctl_alloc_gp_t *rcgp) 649 { 650 rctl_val_t *rval = rcgp->rcag_vals; 651 rctl_t *rctl = rcgp->rcag_ctls; 652 653 while (rval != NULL) { 654 rctl_val_t *next = rval->rcv_next; 655 656 kmem_cache_free(rctl_val_cache, rval); 657 rval = next; 658 } 659 660 while (rctl != NULL) { 661 rctl_t *next = rctl->rc_next; 662 663 kmem_cache_free(rctl_cache, rctl); 664 rctl = next; 665 } 666 } 667 668 /* 669 * void rctl_prealloc_destroy(rctl_alloc_gp_t *) 670 * 671 * Overview 672 * Release all unused memory allocated via one of the "prealloc" functions: 673 * rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc. 674 * 675 * Return values 676 * None. 677 * 678 * Caller's context 679 * No restrictions on context. 680 */ 681 void 682 rctl_prealloc_destroy(rctl_alloc_gp_t *gp) 683 { 684 rctl_gp_free(gp); 685 kmem_free(gp, sizeof (rctl_alloc_gp_t)); 686 } 687 688 /* 689 * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int) 690 * 691 * Overview 692 * This function defines an ordering to rctl_val_t's in order to allow 693 * for correct placement in value lists. When the imprecise flag is set, 694 * the action recipient is ignored. This is to facilitate insert, 695 * delete, and replace operations by rctlsys. 696 * 697 * Return values 698 * 0 if the val_t's are are considered identical 699 * -1 if a is ordered lower than b 700 * 1 if a is lowered higher than b 701 * 702 * Caller's context 703 * No restrictions on context. 704 */ 705 int 706 rctl_val_cmp(rctl_val_t *a, rctl_val_t *b, int imprecise) 707 { 708 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) < 709 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 710 return (-1); 711 712 if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) > 713 (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) 714 return (1); 715 716 if (a->rcv_value < b->rcv_value) 717 return (-1); 718 719 if (a->rcv_value > b->rcv_value) 720 return (1); 721 722 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) < 723 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 724 return (-1); 725 726 if ((a->rcv_flagaction & RCTL_LOCAL_DENY) > 727 (b->rcv_flagaction & RCTL_LOCAL_DENY)) 728 return (1); 729 730 if (a->rcv_privilege < b->rcv_privilege) 731 return (-1); 732 733 if (a->rcv_privilege > b->rcv_privilege) 734 return (1); 735 736 if (imprecise) 737 return (0); 738 739 if (a->rcv_action_recip_pid < b->rcv_action_recip_pid) 740 return (-1); 741 742 if (a->rcv_action_recip_pid > b->rcv_action_recip_pid) 743 return (1); 744 745 return (0); 746 } 747 748 static rctl_val_t * 749 rctl_val_list_find(rctl_val_t **head, rctl_val_t *cval) 750 { 751 rctl_val_t *rval = *head; 752 753 while (rval != NULL) { 754 if (rctl_val_cmp(cval, rval, 0) == 0) 755 return (rval); 756 757 rval = rval->rcv_next; 758 } 759 760 return (NULL); 761 762 } 763 764 /* 765 * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *) 766 * 767 * Overview 768 * This function inserts the rctl_val_t into the value list provided. 769 * The insert is always successful unless if the value is a duplicate 770 * of one already in the list. 771 * 772 * Return values 773 * 1 if the value was a duplicate of an existing value in the list. 774 * 0 if the insert was successful. 775 */ 776 int 777 rctl_val_list_insert(rctl_val_t **root, rctl_val_t *rval) 778 { 779 rctl_val_t *prev; 780 int equiv; 781 782 rval->rcv_next = NULL; 783 rval->rcv_prev = NULL; 784 785 if (*root == NULL) { 786 *root = rval; 787 return (0); 788 } 789 790 equiv = rctl_val_cmp(rval, *root, 0); 791 792 if (equiv == 0) 793 return (1); 794 795 if (equiv < 0) { 796 rval->rcv_next = *root; 797 rval->rcv_next->rcv_prev = rval; 798 *root = rval; 799 800 return (0); 801 } 802 803 prev = *root; 804 while (prev->rcv_next != NULL && 805 (equiv = rctl_val_cmp(rval, prev->rcv_next, 0)) > 0) { 806 prev = prev->rcv_next; 807 } 808 809 if (equiv == 0) 810 return (1); 811 812 rval->rcv_next = prev->rcv_next; 813 if (rval->rcv_next != NULL) 814 rval->rcv_next->rcv_prev = rval; 815 prev->rcv_next = rval; 816 rval->rcv_prev = prev; 817 818 return (0); 819 } 820 821 static int 822 rctl_val_list_delete(rctl_val_t **root, rctl_val_t *rval) 823 { 824 rctl_val_t *prev; 825 826 if (*root == NULL) 827 return (-1); 828 829 prev = *root; 830 if (rctl_val_cmp(rval, prev, 0) == 0) { 831 *root = prev->rcv_next; 832 if (*root != NULL) 833 (*root)->rcv_prev = NULL; 834 835 kmem_cache_free(rctl_val_cache, prev); 836 837 return (0); 838 } 839 840 while (prev->rcv_next != NULL && 841 rctl_val_cmp(rval, prev->rcv_next, 0) != 0) { 842 prev = prev->rcv_next; 843 } 844 845 if (prev->rcv_next == NULL) { 846 /* 847 * If we navigate the entire list and cannot find a match, then 848 * return failure. 849 */ 850 return (-1); 851 } 852 853 prev = prev->rcv_next; 854 prev->rcv_prev->rcv_next = prev->rcv_next; 855 if (prev->rcv_next != NULL) 856 prev->rcv_next->rcv_prev = prev->rcv_prev; 857 858 kmem_cache_free(rctl_val_cache, prev); 859 860 return (0); 861 } 862 863 static rctl_val_t * 864 rctl_val_list_dup(rctl_val_t *rval, rctl_alloc_gp_t *ragp, struct proc *oldp, 865 struct proc *newp) 866 { 867 rctl_val_t *head = NULL; 868 869 for (; rval != NULL; rval = rval->rcv_next) { 870 rctl_val_t *dval = rctl_gp_detach_val(ragp); 871 872 bcopy(rval, dval, sizeof (rctl_val_t)); 873 dval->rcv_prev = dval->rcv_next = NULL; 874 875 if (oldp == NULL || 876 rval->rcv_action_recipient == NULL || 877 rval->rcv_action_recipient == oldp) { 878 if (rval->rcv_privilege == RCPRIV_BASIC) { 879 dval->rcv_action_recipient = newp; 880 dval->rcv_action_recip_pid = newp->p_pid; 881 } else { 882 dval->rcv_action_recipient = NULL; 883 dval->rcv_action_recip_pid = -1; 884 } 885 886 (void) rctl_val_list_insert(&head, dval); 887 } else { 888 kmem_cache_free(rctl_val_cache, dval); 889 } 890 } 891 892 return (head); 893 } 894 895 static void 896 rctl_val_list_reset(rctl_val_t *rval) 897 { 898 for (; rval != NULL; rval = rval->rcv_next) 899 rval->rcv_firing_time = 0; 900 } 901 902 static uint_t 903 rctl_val_list_count(rctl_val_t *rval) 904 { 905 uint_t n = 0; 906 907 for (; rval != NULL; rval = rval->rcv_next) 908 n++; 909 910 return (n); 911 } 912 913 914 static void 915 rctl_val_list_free(rctl_val_t *rval) 916 { 917 while (rval != NULL) { 918 rctl_val_t *next = rval->rcv_next; 919 920 kmem_cache_free(rctl_val_cache, rval); 921 922 rval = next; 923 } 924 } 925 926 /* 927 * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *) 928 * 929 * Overview 930 * In cases where the operating system supports more than one process 931 * addressing model, the operating system capabilities will exceed those of 932 * one or more of these models. Processes in a less capable model must have 933 * their resources accurately controlled, without diluting those of their 934 * descendants reached via exec(). rctl_model_maximum() returns the governing 935 * value for the specified process with respect to a resource control, such 936 * that the value can used for the RCTLOP_SET callback or compatability 937 * support. 938 * 939 * Return values 940 * The maximum value for the given process for the specified resource control. 941 * 942 * Caller's context 943 * No restrictions on context. 944 */ 945 rctl_qty_t 946 rctl_model_maximum(rctl_dict_entry_t *rde, struct proc *p) 947 { 948 if (p->p_model == DATAMODEL_NATIVE) 949 return (rde->rcd_max_native); 950 951 return (rde->rcd_max_ilp32); 952 } 953 954 /* 955 * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t) 956 * 957 * Overview 958 * Convenience function wrapping the rctl_model_maximum() functionality. 959 * 960 * Return values 961 * The lesser of the process's maximum value and the given value for the 962 * specified resource control. 963 * 964 * Caller's context 965 * No restrictions on context. 966 */ 967 rctl_qty_t 968 rctl_model_value(rctl_dict_entry_t *rde, struct proc *p, rctl_qty_t value) 969 { 970 rctl_qty_t max = rctl_model_maximum(rde, p); 971 972 return (value < max ? value : max); 973 } 974 975 static void 976 rctl_set_insert(rctl_set_t *set, rctl_hndl_t hndl, rctl_t *rctl) 977 { 978 uint_t index = hndl % rctl_set_size; 979 rctl_t *next_ctl, *prev_ctl; 980 981 ASSERT(MUTEX_HELD(&set->rcs_lock)); 982 983 rctl->rc_next = NULL; 984 985 if (set->rcs_ctls[index] == NULL) { 986 set->rcs_ctls[index] = rctl; 987 return; 988 } 989 990 if (hndl < set->rcs_ctls[index]->rc_id) { 991 rctl->rc_next = set->rcs_ctls[index]; 992 set->rcs_ctls[index] = rctl; 993 994 return; 995 } 996 997 for (next_ctl = set->rcs_ctls[index]->rc_next, 998 prev_ctl = set->rcs_ctls[index]; 999 next_ctl != NULL; 1000 prev_ctl = next_ctl, 1001 next_ctl = next_ctl->rc_next) { 1002 if (next_ctl->rc_id > hndl) { 1003 rctl->rc_next = next_ctl; 1004 prev_ctl->rc_next = rctl; 1005 1006 return; 1007 } 1008 } 1009 1010 rctl->rc_next = next_ctl; 1011 prev_ctl->rc_next = rctl; 1012 } 1013 1014 /* 1015 * rctl_set_t *rctl_set_create() 1016 * 1017 * Overview 1018 * Create an empty resource control set, suitable for attaching to a 1019 * controlled entity. 1020 * 1021 * Return values 1022 * A pointer to the newly created set. 1023 * 1024 * Caller's context 1025 * Safe for KM_SLEEP allocations. 1026 */ 1027 rctl_set_t * 1028 rctl_set_create() 1029 { 1030 rctl_set_t *rset = kmem_zalloc(sizeof (rctl_set_t), KM_SLEEP); 1031 1032 mutex_init(&rset->rcs_lock, NULL, MUTEX_DEFAULT, NULL); 1033 rset->rcs_ctls = kmem_zalloc(rctl_set_size * sizeof (rctl_t *), 1034 KM_SLEEP); 1035 rset->rcs_entity = -1; 1036 1037 return (rset); 1038 } 1039 1040 /* 1041 * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t) 1042 * 1043 * Overview 1044 * rctl_set_init_prealloc() examines the globally defined resource controls 1045 * and their default values and returns a resource control allocation group 1046 * populated with sufficient controls and values to form a representative 1047 * resource control set for the specified entity. 1048 * 1049 * Return values 1050 * A pointer to the newly created allocation group. 1051 * 1052 * Caller's context 1053 * Caller must be in a context suitable for KM_SLEEP allocations. 1054 */ 1055 rctl_alloc_gp_t * 1056 rctl_set_init_prealloc(rctl_entity_t entity) 1057 { 1058 rctl_dict_entry_t *rde; 1059 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1060 1061 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1062 1063 if (rctl_lists[entity] == NULL) 1064 return (ragp); 1065 1066 mutex_enter(&rctl_lists_lock); 1067 1068 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1069 ragp->rcag_nctls++; 1070 ragp->rcag_nvals += rctl_val_list_count(rde->rcd_default_value); 1071 } 1072 1073 mutex_exit(&rctl_lists_lock); 1074 1075 rctl_gp_alloc(ragp); 1076 1077 return (ragp); 1078 } 1079 1080 /* 1081 * rctl_set_t *rctl_set_init(rctl_entity_t) 1082 * 1083 * Overview 1084 * rctl_set_create() creates a resource control set, initialized with the 1085 * system infinite values on all registered controls, for attachment to a 1086 * system entity requiring resource controls, such as a process or a task. 1087 * 1088 * Return values 1089 * A pointer to the newly filled set. 1090 * 1091 * Caller's context 1092 * Caller must be holding p_lock on entry so that RCTLOP_SET() functions 1093 * may modify task and project members based on the proc structure 1094 * they are passed. 1095 */ 1096 rctl_set_t * 1097 rctl_set_init(rctl_entity_t entity, struct proc *p, rctl_entity_p_t *e, 1098 rctl_set_t *rset, rctl_alloc_gp_t *ragp) 1099 { 1100 rctl_dict_entry_t *rde; 1101 1102 ASSERT(MUTEX_HELD(&p->p_lock)); 1103 ASSERT(e); 1104 rset->rcs_entity = entity; 1105 1106 if (rctl_lists[entity] == NULL) 1107 return (rset); 1108 1109 mutex_enter(&rctl_lists_lock); 1110 mutex_enter(&rset->rcs_lock); 1111 1112 for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) { 1113 rctl_t *rctl = rctl_gp_detach_ctl(ragp); 1114 1115 rctl->rc_dict_entry = rde; 1116 rctl->rc_id = rde->rcd_id; 1117 rctl->rc_projdb = NULL; 1118 1119 rctl->rc_values = rctl_val_list_dup(rde->rcd_default_value, 1120 ragp, NULL, p); 1121 rctl->rc_cursor = rctl->rc_values; 1122 1123 ASSERT(rctl->rc_cursor != NULL); 1124 1125 rctl_set_insert(rset, rde->rcd_id, rctl); 1126 1127 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1128 rctl->rc_cursor->rcv_value)); 1129 } 1130 1131 mutex_exit(&rset->rcs_lock); 1132 mutex_exit(&rctl_lists_lock); 1133 1134 return (rset); 1135 } 1136 1137 static rctl_t * 1138 rctl_dup(rctl_t *rctl, rctl_alloc_gp_t *ragp, struct proc *oldp, 1139 struct proc *newp) 1140 { 1141 rctl_t *dup = rctl_gp_detach_ctl(ragp); 1142 rctl_val_t *dval; 1143 1144 dup->rc_id = rctl->rc_id; 1145 dup->rc_dict_entry = rctl->rc_dict_entry; 1146 dup->rc_next = NULL; 1147 dup->rc_cursor = NULL; 1148 dup->rc_values = rctl_val_list_dup(rctl->rc_values, ragp, oldp, newp); 1149 1150 for (dval = dup->rc_values; 1151 dval != NULL; dval = dval->rcv_next) { 1152 if (rctl_val_cmp(rctl->rc_cursor, dval, 0) >= 0) { 1153 dup->rc_cursor = dval; 1154 break; 1155 } 1156 } 1157 1158 if (dup->rc_cursor == NULL) 1159 dup->rc_cursor = dup->rc_values; 1160 1161 return (dup); 1162 } 1163 1164 static void 1165 rctl_set_fill_alloc_gp(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1166 { 1167 uint_t i; 1168 1169 bzero(ragp, sizeof (rctl_alloc_gp_t)); 1170 1171 for (i = 0; i < rctl_set_size; i++) { 1172 rctl_t *r = set->rcs_ctls[i]; 1173 1174 while (r != NULL) { 1175 ragp->rcag_nctls++; 1176 1177 ragp->rcag_nvals += rctl_val_list_count(r->rc_values); 1178 1179 r = r->rc_next; 1180 } 1181 } 1182 } 1183 1184 /* 1185 * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *) 1186 * 1187 * Overview 1188 * Given a resource control set, allocate a sufficiently large allocation 1189 * group to contain a duplicate of the set. 1190 * 1191 * Return value 1192 * A pointer to the newly created allocation group. 1193 * 1194 * Caller's context 1195 * Safe for KM_SLEEP allocations. 1196 */ 1197 rctl_alloc_gp_t * 1198 rctl_set_dup_prealloc(rctl_set_t *set) 1199 { 1200 rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 1201 1202 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 1203 1204 mutex_enter(&set->rcs_lock); 1205 rctl_set_fill_alloc_gp(set, ragp); 1206 mutex_exit(&set->rcs_lock); 1207 1208 rctl_gp_alloc(ragp); 1209 1210 return (ragp); 1211 } 1212 1213 /* 1214 * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *) 1215 * 1216 * Overview 1217 * Verify that the allocation group provided is large enough to allow a 1218 * duplicate of the given resource control set to be constructed from its 1219 * contents. 1220 * 1221 * Return values 1222 * 1 if the allocation group is sufficiently large, 0 otherwise. 1223 * 1224 * Caller's context 1225 * rcs_lock must be held prior to entry. 1226 */ 1227 int 1228 rctl_set_dup_ready(rctl_set_t *set, rctl_alloc_gp_t *ragp) 1229 { 1230 rctl_alloc_gp_t curr_gp; 1231 1232 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1233 1234 rctl_set_fill_alloc_gp(set, &curr_gp); 1235 1236 if (curr_gp.rcag_nctls <= ragp->rcag_nctls && 1237 curr_gp.rcag_nvals <= ragp->rcag_nvals) 1238 return (1); 1239 1240 return (0); 1241 } 1242 1243 /* 1244 * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *, 1245 * rctl_set_t *, rctl_alloc_gp_t *, int) 1246 * 1247 * Overview 1248 * Make a duplicate of the resource control set. The proc pointers are those 1249 * of the owning process and of the process associated with the entity 1250 * receiving the duplicate. 1251 * 1252 * Duplication is a 3 stage process. Stage 1 is memory allocation for 1253 * the duplicate set, which is taken care of by rctl_set_dup_prealloc(). 1254 * Stage 2 consists of copying all rctls and values from the old set into 1255 * the new. Stage 3 completes the duplication by performing the appropriate 1256 * callbacks for each rctl in the new set. 1257 * 1258 * Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and 1259 * RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only 1260 * be supplied if the newp proc structure reflects the new task and 1261 * project linkage. 1262 * 1263 * Return value 1264 * A pointer to the duplicate set. 1265 * 1266 * Caller's context 1267 * The rcs_lock of the set to be duplicated must be held prior to entry. 1268 */ 1269 rctl_set_t * 1270 rctl_set_dup(rctl_set_t *set, struct proc *oldp, struct proc *newp, 1271 rctl_entity_p_t *e, rctl_set_t *dup, rctl_alloc_gp_t *ragp, int flag) 1272 { 1273 uint_t i; 1274 rctl_set_t *iter; 1275 1276 ASSERT((flag & RCD_DUP) || (flag & RCD_CALLBACK)); 1277 ASSERT(e); 1278 /* 1279 * When copying the old set, iterate over that. Otherwise, when 1280 * only callbacks have been requested, iterate over the dup set. 1281 */ 1282 if (flag & RCD_DUP) { 1283 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1284 iter = set; 1285 dup->rcs_entity = set->rcs_entity; 1286 } else { 1287 iter = dup; 1288 } 1289 1290 mutex_enter(&dup->rcs_lock); 1291 1292 for (i = 0; i < rctl_set_size; i++) { 1293 rctl_t *r = iter->rcs_ctls[i]; 1294 rctl_t *d; 1295 1296 while (r != NULL) { 1297 if (flag & RCD_DUP) { 1298 d = rctl_dup(r, ragp, oldp, newp); 1299 rctl_set_insert(dup, r->rc_id, d); 1300 } else { 1301 d = r; 1302 } 1303 1304 if (flag & RCD_CALLBACK) 1305 RCTLOP_SET(d, newp, e, 1306 rctl_model_value(d->rc_dict_entry, newp, 1307 d->rc_cursor->rcv_value)); 1308 1309 r = r->rc_next; 1310 } 1311 } 1312 1313 mutex_exit(&dup->rcs_lock); 1314 1315 return (dup); 1316 } 1317 1318 /* 1319 * void rctl_set_free(rctl_set_t *) 1320 * 1321 * Overview 1322 * Delete resource control set and all attached values. 1323 * 1324 * Return values 1325 * No value returned. 1326 * 1327 * Caller's context 1328 * No restrictions on context. 1329 */ 1330 void 1331 rctl_set_free(rctl_set_t *set) 1332 { 1333 uint_t i; 1334 1335 mutex_enter(&set->rcs_lock); 1336 for (i = 0; i < rctl_set_size; i++) { 1337 rctl_t *r = set->rcs_ctls[i]; 1338 1339 while (r != NULL) { 1340 rctl_val_t *v = r->rc_values; 1341 rctl_t *n = r->rc_next; 1342 1343 kmem_cache_free(rctl_cache, r); 1344 1345 rctl_val_list_free(v); 1346 1347 r = n; 1348 } 1349 } 1350 mutex_exit(&set->rcs_lock); 1351 1352 kmem_free(set->rcs_ctls, sizeof (rctl_t *) * rctl_set_size); 1353 kmem_free(set, sizeof (rctl_set_t)); 1354 } 1355 1356 /* 1357 * void rctl_set_reset(rctl_set_t *) 1358 * 1359 * Overview 1360 * Resets all rctls within the set such that the lowest value becomes active. 1361 * 1362 * Return values 1363 * No value returned. 1364 * 1365 * Caller's context 1366 * No restrictions on context. 1367 */ 1368 void 1369 rctl_set_reset(rctl_set_t *set, struct proc *p, rctl_entity_p_t *e) 1370 { 1371 uint_t i; 1372 1373 ASSERT(e); 1374 1375 mutex_enter(&set->rcs_lock); 1376 for (i = 0; i < rctl_set_size; i++) { 1377 rctl_t *r = set->rcs_ctls[i]; 1378 1379 while (r != NULL) { 1380 r->rc_cursor = r->rc_values; 1381 rctl_val_list_reset(r->rc_cursor); 1382 RCTLOP_SET(r, p, e, rctl_model_value(r->rc_dict_entry, 1383 p, r->rc_cursor->rcv_value)); 1384 1385 ASSERT(r->rc_cursor != NULL); 1386 1387 r = r->rc_next; 1388 } 1389 } 1390 1391 mutex_exit(&set->rcs_lock); 1392 } 1393 1394 /* 1395 * void rctl_set_tearoff(rctl_set *, struct proc *) 1396 * 1397 * Overview 1398 * Tear off any resource control values on this set with an action recipient 1399 * equal to the specified process (as they are becoming invalid with the 1400 * process's departure from this set as an observer). 1401 * 1402 * Return values 1403 * No value returned. 1404 * 1405 * Caller's context 1406 * No restrictions on context 1407 */ 1408 void 1409 rctl_set_tearoff(rctl_set_t *set, struct proc *p) 1410 { 1411 uint_t i; 1412 1413 mutex_enter(&set->rcs_lock); 1414 for (i = 0; i < rctl_set_size; i++) { 1415 rctl_t *r = set->rcs_ctls[i]; 1416 1417 while (r != NULL) { 1418 rctl_val_t *rval; 1419 1420 tearoff_rewalk_list: 1421 rval = r->rc_values; 1422 1423 while (rval != NULL) { 1424 if (rval->rcv_privilege == RCPRIV_BASIC && 1425 rval->rcv_action_recipient == p) { 1426 if (r->rc_cursor == rval) 1427 r->rc_cursor = rval->rcv_next; 1428 1429 (void) rctl_val_list_delete( 1430 &r->rc_values, rval); 1431 1432 goto tearoff_rewalk_list; 1433 } 1434 1435 rval = rval->rcv_next; 1436 } 1437 1438 ASSERT(r->rc_cursor != NULL); 1439 1440 r = r->rc_next; 1441 } 1442 } 1443 1444 mutex_exit(&set->rcs_lock); 1445 } 1446 1447 int 1448 rctl_set_find(rctl_set_t *set, rctl_hndl_t hndl, rctl_t **rctl) 1449 { 1450 uint_t index = hndl % rctl_set_size; 1451 rctl_t *curr_ctl; 1452 1453 ASSERT(MUTEX_HELD(&set->rcs_lock)); 1454 1455 for (curr_ctl = set->rcs_ctls[index]; curr_ctl != NULL; 1456 curr_ctl = curr_ctl->rc_next) { 1457 if (curr_ctl->rc_id == hndl) { 1458 *rctl = curr_ctl; 1459 1460 return (0); 1461 } 1462 } 1463 1464 return (-1); 1465 } 1466 1467 /* 1468 * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *) 1469 * 1470 * Overview 1471 * Given a process, get the next enforced value on the rctl of the specified 1472 * handle. 1473 * 1474 * Return value 1475 * The enforced value. 1476 * 1477 * Caller's context 1478 * For controls on process collectives, p->p_lock must be held across the 1479 * operation. 1480 */ 1481 /*ARGSUSED*/ 1482 rctl_qty_t 1483 rctl_enforced_value(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p) 1484 { 1485 rctl_t *rctl; 1486 rlim64_t ret; 1487 1488 mutex_enter(&rset->rcs_lock); 1489 1490 if (rctl_set_find(rset, hndl, &rctl) == -1) 1491 panic("unknown resource control handle %d requested", hndl); 1492 else 1493 ret = rctl_model_value(rctl->rc_dict_entry, p, 1494 rctl->rc_cursor->rcv_value); 1495 1496 mutex_exit(&rset->rcs_lock); 1497 1498 return (ret); 1499 } 1500 1501 /* 1502 * int rctl_global_get(const char *, rctl_dict_entry_t *) 1503 * 1504 * Overview 1505 * Copy a sanitized version of the global rctl for a given resource control 1506 * name. (By sanitization, we mean that the unsafe data pointers have been 1507 * zeroed.) 1508 * 1509 * Return value 1510 * -1 if name not defined, 0 otherwise. 1511 * 1512 * Caller's context 1513 * No restrictions on context. rctl_dict_lock must not be held. 1514 */ 1515 int 1516 rctl_global_get(const char *name, rctl_dict_entry_t *drde) 1517 { 1518 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1519 1520 if (rde == NULL) 1521 return (-1); 1522 1523 bcopy(rde, drde, sizeof (rctl_dict_entry_t)); 1524 1525 drde->rcd_next = NULL; 1526 drde->rcd_ops = NULL; 1527 1528 return (0); 1529 } 1530 1531 /* 1532 * int rctl_global_set(const char *, rctl_dict_entry_t *) 1533 * 1534 * Overview 1535 * Transfer the settable fields of the named rctl to the global rctl matching 1536 * the given resource control name. 1537 * 1538 * Return value 1539 * -1 if name not defined, 0 otherwise. 1540 * 1541 * Caller's context 1542 * No restrictions on context. rctl_dict_lock must not be held. 1543 */ 1544 int 1545 rctl_global_set(const char *name, rctl_dict_entry_t *drde) 1546 { 1547 rctl_dict_entry_t *rde = rctl_dict_lookup(name); 1548 1549 if (rde == NULL) 1550 return (-1); 1551 1552 rde->rcd_flagaction = drde->rcd_flagaction; 1553 rde->rcd_syslog_level = drde->rcd_syslog_level; 1554 rde->rcd_strlog_flags = drde->rcd_strlog_flags; 1555 1556 return (0); 1557 } 1558 1559 static int 1560 rctl_local_op(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1561 int (*cbop)(rctl_hndl_t, struct proc *p, rctl_entity_p_t *e, rctl_t *, 1562 rctl_val_t *, rctl_val_t *), struct proc *p) 1563 { 1564 rctl_t *rctl; 1565 rctl_set_t *rset; 1566 rctl_entity_p_t e; 1567 int ret = 0; 1568 rctl_dict_entry_t *rde = rctl_dict_lookup_hndl(hndl); 1569 1570 ASSERT(MUTEX_HELD(&p->p_lock)); 1571 1572 rset = rctl_entity_obtain_rset(rde, p); 1573 1574 if (rset == NULL) { 1575 return (-1); 1576 } 1577 rctl_entity_obtain_entity_p(rset->rcs_entity, p, &e); 1578 1579 mutex_enter(&rset->rcs_lock); 1580 1581 /* using rctl's hndl, get rctl from local set */ 1582 if (rctl_set_find(rset, hndl, &rctl) == -1) { 1583 mutex_exit(&rset->rcs_lock); 1584 return (-1); 1585 } 1586 1587 ret = cbop(hndl, p, &e, rctl, oval, nval); 1588 1589 mutex_exit(&rset->rcs_lock); 1590 return (ret); 1591 } 1592 1593 /*ARGSUSED*/ 1594 static int 1595 rctl_local_get_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1596 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1597 { 1598 if (oval == NULL) { 1599 /* 1600 * RCTL_FIRST 1601 */ 1602 bcopy(rctl->rc_values, nval, sizeof (rctl_val_t)); 1603 } else { 1604 /* 1605 * RCTL_NEXT 1606 */ 1607 rctl_val_t *tval = rctl_val_list_find(&rctl->rc_values, oval); 1608 1609 if (tval == NULL) 1610 return (ESRCH); 1611 else if (tval->rcv_next == NULL) 1612 return (ENOENT); 1613 else 1614 bcopy(tval->rcv_next, nval, sizeof (rctl_val_t)); 1615 } 1616 1617 return (0); 1618 } 1619 1620 /* 1621 * int rctl_local_get(rctl_hndl_t, rctl_val_t *) 1622 * 1623 * Overview 1624 * Get the rctl value for the given flags. 1625 * 1626 * Return values 1627 * 0 for successful get, errno otherwise. 1628 */ 1629 int 1630 rctl_local_get(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 1631 struct proc *p) 1632 { 1633 return (rctl_local_op(hndl, oval, nval, rctl_local_get_cb, p)); 1634 } 1635 1636 /*ARGSUSED*/ 1637 static int 1638 rctl_local_delete_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1639 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1640 { 1641 if ((oval = rctl_val_list_find(&rctl->rc_values, nval)) == NULL) 1642 return (ESRCH); 1643 1644 if (rctl->rc_cursor == oval) { 1645 rctl->rc_cursor = oval->rcv_next; 1646 rctl_val_list_reset(rctl->rc_cursor); 1647 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1648 rctl->rc_cursor->rcv_value)); 1649 1650 ASSERT(rctl->rc_cursor != NULL); 1651 } 1652 1653 (void) rctl_val_list_delete(&rctl->rc_values, oval); 1654 1655 return (0); 1656 } 1657 1658 /* 1659 * int rctl_local_delete(rctl_hndl_t, rctl_val_t *) 1660 * 1661 * Overview 1662 * Delete the rctl value for the given flags. 1663 * 1664 * Return values 1665 * 0 for successful delete, errno otherwise. 1666 */ 1667 int 1668 rctl_local_delete(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1669 { 1670 return (rctl_local_op(hndl, NULL, val, rctl_local_delete_cb, p)); 1671 } 1672 1673 /* 1674 * rctl_local_insert_cb() 1675 * 1676 * Overview 1677 * Insert a new value into the rctl's val list. If an error occurs, 1678 * the val list must be left in the same state as when the function 1679 * was entered. 1680 * 1681 * Return Values 1682 * 0 for successful insert, EINVAL if the value is duplicated in the 1683 * existing list. 1684 */ 1685 /*ARGSUSED*/ 1686 static int 1687 rctl_local_insert_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1688 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1689 { 1690 /* 1691 * Before inserting, confirm there are no duplicates of this value 1692 * and flag level. If there is a duplicate, flag an error and do 1693 * nothing. 1694 */ 1695 if (rctl_val_list_insert(&rctl->rc_values, nval) != 0) 1696 return (EINVAL); 1697 1698 if (rctl_val_cmp(nval, rctl->rc_cursor, 0) < 0) { 1699 rctl->rc_cursor = nval; 1700 rctl_val_list_reset(rctl->rc_cursor); 1701 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1702 rctl->rc_cursor->rcv_value)); 1703 1704 ASSERT(rctl->rc_cursor != NULL); 1705 } 1706 1707 return (0); 1708 } 1709 1710 /* 1711 * int rctl_local_insert(rctl_hndl_t, rctl_val_t *) 1712 * 1713 * Overview 1714 * Insert the rctl value into the appropriate rctl set for the calling 1715 * process, given the handle. 1716 */ 1717 int 1718 rctl_local_insert(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p) 1719 { 1720 return (rctl_local_op(hndl, NULL, val, rctl_local_insert_cb, p)); 1721 } 1722 1723 /* 1724 * rctl_local_insert_all_cb() 1725 * 1726 * Overview 1727 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1728 * 1729 * Inserts new values from the project database (new_values). alloc_values 1730 * should be a linked list of pre-allocated rctl_val_t, which are used to 1731 * populate (rc_projdb). 1732 * 1733 * Should the *new_values linked list match the contents of the rctl's 1734 * rp_projdb then we do nothing. 1735 * 1736 * Return Values 1737 * 0 is always returned. 1738 */ 1739 /*ARGSUSED*/ 1740 static int 1741 rctl_local_insert_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1742 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1743 { 1744 rctl_val_t *val; 1745 rctl_val_t *tmp_val; 1746 rctl_val_t *next; 1747 int modified = 0; 1748 1749 /* 1750 * If this the first time we've set this project rctl, then we delete 1751 * all the privilege values. These privilege values have been set by 1752 * rctl_add_default_limit(). 1753 * 1754 * We save some cycles here by not calling rctl_val_list_delete(). 1755 */ 1756 if (rctl->rc_projdb == NULL) { 1757 val = rctl->rc_values; 1758 1759 while (val != NULL) { 1760 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1761 if (val->rcv_prev != NULL) 1762 val->rcv_prev->rcv_next = val->rcv_next; 1763 else 1764 rctl->rc_values = val->rcv_next; 1765 1766 if (val->rcv_next != NULL) 1767 val->rcv_next->rcv_prev = val->rcv_prev; 1768 1769 tmp_val = val; 1770 val = val->rcv_next; 1771 kmem_cache_free(rctl_val_cache, tmp_val); 1772 } else { 1773 val = val->rcv_next; 1774 } 1775 } 1776 modified = 1; 1777 } 1778 1779 /* 1780 * Delete active values previously set through the project database. 1781 */ 1782 val = rctl->rc_projdb; 1783 1784 while (val != NULL) { 1785 1786 /* Is the old value found in the new values? */ 1787 if (rctl_val_list_find(&new_values, val) == NULL) { 1788 1789 /* 1790 * Delete from the active values if it originated from 1791 * the project database. 1792 */ 1793 if (((tmp_val = rctl_val_list_find(&rctl->rc_values, 1794 val)) != NULL) && 1795 (tmp_val->rcv_flagaction & RCTL_LOCAL_PROJDB)) { 1796 (void) rctl_val_list_delete(&rctl->rc_values, 1797 tmp_val); 1798 } 1799 1800 tmp_val = val->rcv_next; 1801 (void) rctl_val_list_delete(&rctl->rc_projdb, val); 1802 val = tmp_val; 1803 modified = 1; 1804 1805 } else 1806 val = val->rcv_next; 1807 } 1808 1809 /* 1810 * Insert new values from the project database. 1811 */ 1812 while (new_values != NULL) { 1813 next = new_values->rcv_next; 1814 1815 /* 1816 * Insert this new value into the rc_projdb, and duplicate this 1817 * entry to the active list. 1818 */ 1819 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1820 1821 tmp_val = alloc_values->rcv_next; 1822 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1823 alloc_values->rcv_next = tmp_val; 1824 1825 if (rctl_val_list_insert(&rctl->rc_values, 1826 alloc_values) == 0) { 1827 /* inserted move alloc_values on */ 1828 alloc_values = tmp_val; 1829 modified = 1; 1830 } 1831 } else { 1832 /* 1833 * Unlike setrctl() we don't want to return an error on 1834 * a duplicate entry; we are concerned solely with 1835 * ensuring that all the values specified are set. 1836 */ 1837 kmem_cache_free(rctl_val_cache, new_values); 1838 } 1839 new_values = next; 1840 } 1841 1842 /* Teardown any unused rctl_val_t */ 1843 while (alloc_values != NULL) { 1844 tmp_val = alloc_values; 1845 alloc_values = alloc_values->rcv_next; 1846 kmem_cache_free(rctl_val_cache, tmp_val); 1847 } 1848 1849 /* Reset the cursor if rctl values have been modified */ 1850 if (modified) { 1851 rctl->rc_cursor = rctl->rc_values; 1852 rctl_val_list_reset(rctl->rc_cursor); 1853 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1854 rctl->rc_cursor->rcv_value)); 1855 } 1856 1857 return (0); 1858 } 1859 1860 int 1861 rctl_local_insert_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1862 rctl_val_t *alloc_values, struct proc *p) 1863 { 1864 return (rctl_local_op(hndl, new_values, alloc_values, 1865 rctl_local_insert_all_cb, p)); 1866 } 1867 1868 /* 1869 * rctl_local_replace_all_cb() 1870 * 1871 * Overview 1872 * Called for RCENTITY_PROJECT rctls only, via rctlsys_projset(). 1873 * 1874 * Clears the active rctl values (rc_values), and stored values from the 1875 * previous insertions from the project database (rc_projdb). 1876 * 1877 * Inserts new values from the project database (new_values). alloc_values 1878 * should be a linked list of pre-allocated rctl_val_t, which are used to 1879 * populate (rc_projdb). 1880 * 1881 * Return Values 1882 * 0 is always returned. 1883 */ 1884 /*ARGSUSED*/ 1885 static int 1886 rctl_local_replace_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1887 rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values) 1888 { 1889 rctl_val_t *val; 1890 rctl_val_t *next; 1891 rctl_val_t *tmp_val; 1892 1893 /* Delete all the privilege vaules */ 1894 val = rctl->rc_values; 1895 1896 while (val != NULL) { 1897 if (val->rcv_privilege == RCPRIV_PRIVILEGED) { 1898 if (val->rcv_prev != NULL) 1899 val->rcv_prev->rcv_next = val->rcv_next; 1900 else 1901 rctl->rc_values = val->rcv_next; 1902 1903 if (val->rcv_next != NULL) 1904 val->rcv_next->rcv_prev = val->rcv_prev; 1905 1906 tmp_val = val; 1907 val = val->rcv_next; 1908 kmem_cache_free(rctl_val_cache, tmp_val); 1909 } else { 1910 val = val->rcv_next; 1911 } 1912 } 1913 1914 /* Delete the contents of rc_projdb */ 1915 val = rctl->rc_projdb; 1916 while (val != NULL) { 1917 1918 tmp_val = val; 1919 val = val->rcv_next; 1920 kmem_cache_free(rctl_val_cache, tmp_val); 1921 } 1922 rctl->rc_projdb = NULL; 1923 1924 /* 1925 * Insert new values from the project database. 1926 */ 1927 while (new_values != NULL) { 1928 next = new_values->rcv_next; 1929 1930 if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) { 1931 tmp_val = alloc_values->rcv_next; 1932 bcopy(new_values, alloc_values, sizeof (rctl_val_t)); 1933 alloc_values->rcv_next = tmp_val; 1934 1935 if (rctl_val_list_insert(&rctl->rc_values, 1936 alloc_values) == 0) { 1937 /* inserted, so move alloc_values on */ 1938 alloc_values = tmp_val; 1939 } 1940 } else { 1941 /* 1942 * Unlike setrctl() we don't want to return an error on 1943 * a duplicate entry; we are concerned solely with 1944 * ensuring that all the values specified are set. 1945 */ 1946 kmem_cache_free(rctl_val_cache, new_values); 1947 } 1948 1949 new_values = next; 1950 } 1951 1952 /* Teardown any unused rctl_val_t */ 1953 while (alloc_values != NULL) { 1954 tmp_val = alloc_values; 1955 alloc_values = alloc_values->rcv_next; 1956 kmem_cache_free(rctl_val_cache, tmp_val); 1957 } 1958 1959 /* Always reset the cursor */ 1960 rctl->rc_cursor = rctl->rc_values; 1961 rctl_val_list_reset(rctl->rc_cursor); 1962 RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p, 1963 rctl->rc_cursor->rcv_value)); 1964 1965 return (0); 1966 } 1967 1968 int 1969 rctl_local_replace_all(rctl_hndl_t hndl, rctl_val_t *new_values, 1970 rctl_val_t *alloc_values, struct proc *p) 1971 { 1972 return (rctl_local_op(hndl, new_values, alloc_values, 1973 rctl_local_replace_all_cb, p)); 1974 } 1975 1976 static int 1977 rctl_local_replace_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e, 1978 rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval) 1979 { 1980 int ret; 1981 rctl_val_t *tmp; 1982 1983 /* Verify that old will be delete-able */ 1984 tmp = rctl_val_list_find(&rctl->rc_values, oval); 1985 if (tmp == NULL) 1986 return (ESRCH); 1987 /* 1988 * Caller should verify that value being deleted is not the 1989 * system value. 1990 */ 1991 ASSERT(tmp->rcv_privilege != RCPRIV_SYSTEM); 1992 1993 /* 1994 * rctl_local_insert_cb() does the job of flagging an error 1995 * for any duplicate values. So, call rctl_local_insert_cb() 1996 * for the new value first, then do deletion of the old value. 1997 * Since this is a callback function to rctl_local_op, we can 1998 * count on rcs_lock being held at this point. This guarantees 1999 * that there is at no point a visible list which contains both 2000 * new and old values. 2001 */ 2002 if (ret = rctl_local_insert_cb(hndl, p, e, rctl, NULL, nval)) 2003 return (ret); 2004 2005 ret = rctl_local_delete_cb(hndl, p, e, rctl, NULL, oval); 2006 ASSERT(ret == 0); 2007 return (0); 2008 } 2009 2010 /* 2011 * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *) 2012 * 2013 * Overview 2014 * Replace the rctl value with a new one. 2015 * 2016 * Return values 2017 * 0 for successful replace, errno otherwise. 2018 */ 2019 int 2020 rctl_local_replace(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval, 2021 struct proc *p) 2022 { 2023 return (rctl_local_op(hndl, oval, nval, rctl_local_replace_cb, p)); 2024 } 2025 2026 /* 2027 * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *) 2028 * 2029 * Overview 2030 * To support rlimit compatibility, we need a function which takes a 64-bit 2031 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2032 * This operation is only intended for legacy rlimits. 2033 */ 2034 int 2035 rctl_rlimit_get(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64) 2036 { 2037 rctl_t *rctl; 2038 rctl_val_t *rval; 2039 rctl_set_t *rset = p->p_rctls; 2040 int soft_limit_seen = 0; 2041 int test_for_deny = 1; 2042 2043 mutex_enter(&rset->rcs_lock); 2044 if (rctl_set_find(rset, rc, &rctl) == -1) { 2045 mutex_exit(&rset->rcs_lock); 2046 return (-1); 2047 } 2048 2049 rval = rctl->rc_values; 2050 2051 if (rctl->rc_dict_entry->rcd_flagaction & (RCTL_GLOBAL_DENY_NEVER | 2052 RCTL_GLOBAL_DENY_ALWAYS)) 2053 test_for_deny = 0; 2054 2055 /* 2056 * 1. Find the first control value with the RCTL_LOCAL_DENY bit set. 2057 */ 2058 while (rval != NULL && rval->rcv_privilege != RCPRIV_SYSTEM) { 2059 if (test_for_deny && 2060 (rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0) { 2061 rval = rval->rcv_next; 2062 continue; 2063 } 2064 2065 /* 2066 * 2. If this is an RCPRIV_BASIC value, then we've found the 2067 * effective soft limit and should set rlim_cur. We should then 2068 * continue looking for another control value with the DENY bit 2069 * set. 2070 */ 2071 if (rval->rcv_privilege == RCPRIV_BASIC) { 2072 if (soft_limit_seen) { 2073 rval = rval->rcv_next; 2074 continue; 2075 } 2076 2077 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2078 rval->rcv_value < rctl_model_maximum( 2079 rctl->rc_dict_entry, p)) 2080 rlp64->rlim_cur = rval->rcv_value; 2081 else 2082 rlp64->rlim_cur = RLIM64_INFINITY; 2083 soft_limit_seen = 1; 2084 2085 rval = rval->rcv_next; 2086 continue; 2087 } 2088 2089 /* 2090 * 3. This is an RCPRIV_PRIVILEGED value. If we haven't found 2091 * a soft limit candidate, then we've found the effective hard 2092 * and soft limits and should set both If we had found a soft 2093 * limit, then this is only the hard limit and we need only set 2094 * rlim_max. 2095 */ 2096 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2097 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, 2098 p)) 2099 rlp64->rlim_max = rval->rcv_value; 2100 else 2101 rlp64->rlim_max = RLIM64_INFINITY; 2102 if (!soft_limit_seen) 2103 rlp64->rlim_cur = rlp64->rlim_max; 2104 2105 mutex_exit(&rset->rcs_lock); 2106 return (0); 2107 } 2108 2109 if (rval == NULL) { 2110 /* 2111 * This control sequence is corrupt, as it is not terminated by 2112 * a system privileged control value. 2113 */ 2114 mutex_exit(&rset->rcs_lock); 2115 return (-1); 2116 } 2117 2118 /* 2119 * 4. If we run into a RCPRIV_SYSTEM value, then the hard limit (and 2120 * the soft, if we haven't a soft candidate) should be the value of the 2121 * system control value. 2122 */ 2123 if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 && 2124 rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, p)) 2125 rlp64->rlim_max = rval->rcv_value; 2126 else 2127 rlp64->rlim_max = RLIM64_INFINITY; 2128 2129 if (!soft_limit_seen) 2130 rlp64->rlim_cur = rlp64->rlim_max; 2131 2132 mutex_exit(&rset->rcs_lock); 2133 return (0); 2134 } 2135 2136 /* 2137 * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t) 2138 * 2139 * Overview 2140 * Before making a series of calls to rctl_rlimit_set(), we must have a 2141 * preallocated batch of resource control values, as rctl_rlimit_set() can 2142 * potentially consume two resource control values per call. 2143 * 2144 * Return values 2145 * A populated resource control allocation group with 2n resource control 2146 * values. 2147 * 2148 * Caller's context 2149 * Must be safe for KM_SLEEP allocations. 2150 */ 2151 rctl_alloc_gp_t * 2152 rctl_rlimit_set_prealloc(uint_t n) 2153 { 2154 rctl_alloc_gp_t *gp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP); 2155 2156 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 2157 2158 gp->rcag_nvals = 2 * n; 2159 2160 rctl_gp_alloc(gp); 2161 2162 return (gp); 2163 } 2164 2165 /* 2166 * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int, 2167 * int) 2168 * 2169 * Overview 2170 * To support rlimit compatibility, we need a function which takes a 64-bit 2171 * rlimit and encodes it as appropriate rcontrol values on the given rcontrol. 2172 * This operation is only intended for legacy rlimits. 2173 * 2174 * The implementation of rctl_rlimit_set() is a bit clever, as it tries to 2175 * minimize the number of values placed on the value sequence in various 2176 * cases. Furthermore, we don't allow multiple identical privilege-action 2177 * values on the same sequence. (That is, we don't want a sequence like 2178 * "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel 2179 * memory.) So we want to delete any values with the same privilege value and 2180 * action. 2181 * 2182 * Return values 2183 * 0 for successful set, errno otherwise. Errno will be either EINVAL 2184 * or EPERM, in keeping with defined errnos for ulimit() and setrlimit() 2185 * system calls. 2186 */ 2187 /*ARGSUSED*/ 2188 int 2189 rctl_rlimit_set(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64, 2190 rctl_alloc_gp_t *ragp, int flagaction, int signal, const cred_t *cr) 2191 { 2192 rctl_t *rctl; 2193 rctl_val_t *rval, *rval_priv, *rval_basic; 2194 rctl_set_t *rset = p->p_rctls; 2195 rctl_qty_t max; 2196 rctl_entity_p_t e; 2197 struct rlimit64 cur_rl; 2198 2199 e.rcep_t = RCENTITY_PROCESS; 2200 e.rcep_p.proc = p; 2201 2202 if (rlp64->rlim_cur > rlp64->rlim_max) 2203 return (EINVAL); 2204 2205 if (rctl_rlimit_get(rc, p, &cur_rl) == -1) 2206 return (EINVAL); 2207 2208 /* 2209 * If we are not privileged, we can only lower the hard limit. 2210 */ 2211 if ((rlp64->rlim_max > cur_rl.rlim_max) && 2212 cur_rl.rlim_max != RLIM64_INFINITY && 2213 secpolicy_resource(cr) != 0) 2214 return (EPERM); 2215 2216 mutex_enter(&rset->rcs_lock); 2217 2218 if (rctl_set_find(rset, rc, &rctl) == -1) { 2219 mutex_exit(&rset->rcs_lock); 2220 return (EINVAL); 2221 } 2222 2223 rval_priv = rctl_gp_detach_val(ragp); 2224 2225 rval = rctl->rc_values; 2226 2227 while (rval != NULL) { 2228 rctl_val_t *next = rval->rcv_next; 2229 2230 if (rval->rcv_privilege == RCPRIV_SYSTEM) 2231 break; 2232 2233 if ((rval->rcv_privilege == RCPRIV_BASIC) || 2234 (rval->rcv_flagaction & ~RCTL_LOCAL_ACTION_MASK) == 2235 (flagaction & ~RCTL_LOCAL_ACTION_MASK)) { 2236 if (rctl->rc_cursor == rval) { 2237 rctl->rc_cursor = rval->rcv_next; 2238 rctl_val_list_reset(rctl->rc_cursor); 2239 RCTLOP_SET(rctl, p, &e, rctl_model_value( 2240 rctl->rc_dict_entry, p, 2241 rctl->rc_cursor->rcv_value)); 2242 } 2243 (void) rctl_val_list_delete(&rctl->rc_values, rval); 2244 } 2245 2246 rval = next; 2247 } 2248 2249 rval_priv->rcv_privilege = RCPRIV_PRIVILEGED; 2250 rval_priv->rcv_flagaction = flagaction; 2251 if (rlp64->rlim_max == RLIM64_INFINITY) { 2252 rval_priv->rcv_flagaction |= RCTL_LOCAL_MAXIMAL; 2253 max = rctl->rc_dict_entry->rcd_max_native; 2254 } else { 2255 max = rlp64->rlim_max; 2256 } 2257 rval_priv->rcv_value = max; 2258 rval_priv->rcv_action_signal = signal; 2259 rval_priv->rcv_action_recipient = NULL; 2260 rval_priv->rcv_action_recip_pid = -1; 2261 rval_priv->rcv_firing_time = 0; 2262 rval_priv->rcv_prev = rval_priv->rcv_next = NULL; 2263 2264 (void) rctl_val_list_insert(&rctl->rc_values, rval_priv); 2265 rctl->rc_cursor = rval_priv; 2266 rctl_val_list_reset(rctl->rc_cursor); 2267 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2268 rctl->rc_cursor->rcv_value)); 2269 2270 if (rlp64->rlim_cur != RLIM64_INFINITY && rlp64->rlim_cur < max) { 2271 rval_basic = rctl_gp_detach_val(ragp); 2272 2273 rval_basic->rcv_privilege = RCPRIV_BASIC; 2274 rval_basic->rcv_value = rlp64->rlim_cur; 2275 rval_basic->rcv_flagaction = flagaction; 2276 rval_basic->rcv_action_signal = signal; 2277 rval_basic->rcv_action_recipient = p; 2278 rval_basic->rcv_action_recip_pid = p->p_pid; 2279 rval_basic->rcv_firing_time = 0; 2280 rval_basic->rcv_prev = rval_basic->rcv_next = NULL; 2281 2282 (void) rctl_val_list_insert(&rctl->rc_values, rval_basic); 2283 rctl->rc_cursor = rval_basic; 2284 rctl_val_list_reset(rctl->rc_cursor); 2285 RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p, 2286 rctl->rc_cursor->rcv_value)); 2287 } 2288 2289 ASSERT(rctl->rc_cursor != NULL); 2290 2291 mutex_exit(&rset->rcs_lock); 2292 return (0); 2293 } 2294 2295 2296 /* 2297 * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t, 2298 * rlim64_t, rctl_ops_t *) 2299 * 2300 * Overview 2301 * rctl_register() performs a look-up in the dictionary of rctls 2302 * active on the system; if a rctl of that name is absent, an entry is 2303 * made into the dictionary. The rctl is returned with its reference 2304 * count incremented by one. If the rctl name already exists, we panic. 2305 * (Were the resource control system to support dynamic loading and unloading, 2306 * which it is structured for, duplicate registration should lead to load 2307 * failure instead of panicking.) 2308 * 2309 * Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be 2310 * defined. This limit contains the highest possible value for this quantity 2311 * on the system. Furthermore, the registered control must provide infinite 2312 * values for all applicable address space models supported by the operating 2313 * system. Attempts to set resource control values beyond the system limit 2314 * will fail. 2315 * 2316 * Return values 2317 * The rctl's ID. 2318 * 2319 * Caller's context 2320 * Caller must be in a context suitable for KM_SLEEP allocations. 2321 */ 2322 rctl_hndl_t 2323 rctl_register( 2324 const char *name, 2325 rctl_entity_t entity, 2326 int global_flags, 2327 rlim64_t max_native, 2328 rlim64_t max_ilp32, 2329 rctl_ops_t *ops) 2330 { 2331 rctl_t *rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP); 2332 rctl_val_t *rctl_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP); 2333 rctl_dict_entry_t *rctl_de = kmem_zalloc(sizeof (rctl_dict_entry_t), 2334 KM_SLEEP); 2335 rctl_t *old_rctl; 2336 rctl_hndl_t rhndl; 2337 int localflags; 2338 2339 ASSERT(ops != NULL); 2340 2341 bzero(rctl, sizeof (rctl_t)); 2342 bzero(rctl_val, sizeof (rctl_val_t)); 2343 2344 if (global_flags & RCTL_GLOBAL_DENY_NEVER) 2345 localflags = RCTL_LOCAL_MAXIMAL; 2346 else 2347 localflags = RCTL_LOCAL_MAXIMAL | RCTL_LOCAL_DENY; 2348 2349 rctl_val->rcv_privilege = RCPRIV_SYSTEM; 2350 rctl_val->rcv_value = max_native; 2351 rctl_val->rcv_flagaction = localflags; 2352 rctl_val->rcv_action_signal = 0; 2353 rctl_val->rcv_action_recipient = NULL; 2354 rctl_val->rcv_action_recip_pid = -1; 2355 rctl_val->rcv_firing_time = 0; 2356 rctl_val->rcv_next = NULL; 2357 rctl_val->rcv_prev = NULL; 2358 2359 rctl_de->rcd_name = (char *)name; 2360 rctl_de->rcd_default_value = rctl_val; 2361 rctl_de->rcd_max_native = max_native; 2362 rctl_de->rcd_max_ilp32 = max_ilp32; 2363 rctl_de->rcd_entity = entity; 2364 rctl_de->rcd_ops = ops; 2365 rctl_de->rcd_flagaction = global_flags; 2366 2367 rctl->rc_dict_entry = rctl_de; 2368 rctl->rc_values = rctl_val; 2369 2370 /* 2371 * 1. Take global lock, validate nonexistence of name, get ID. 2372 */ 2373 mutex_enter(&rctl_dict_lock); 2374 2375 if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name, 2376 (mod_hash_val_t *)&rhndl) != MH_ERR_NOTFOUND) 2377 panic("duplicate registration of rctl %s", name); 2378 2379 rhndl = rctl_de->rcd_id = rctl->rc_id = 2380 (rctl_hndl_t)id_alloc(rctl_ids); 2381 2382 /* 2383 * 2. Insert name-entry pair in rctl_dict_by_name. 2384 */ 2385 if (mod_hash_insert(rctl_dict_by_name, (mod_hash_key_t)name, 2386 (mod_hash_val_t)rctl_de)) 2387 panic("unable to insert rctl dict entry for %s (%u)", name, 2388 (uint_t)rctl->rc_id); 2389 2390 /* 2391 * 3. Insert ID-rctl_t * pair in rctl_dict. 2392 */ 2393 if (mod_hash_find(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2394 (mod_hash_val_t *)&old_rctl) != MH_ERR_NOTFOUND) 2395 panic("duplicate rctl ID %u registered", rctl->rc_id); 2396 2397 if (mod_hash_insert(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id, 2398 (mod_hash_val_t)rctl)) 2399 panic("unable to insert rctl %s/%u (%p)", name, 2400 (uint_t)rctl->rc_id, (void *)rctl); 2401 2402 /* 2403 * 3a. Insert rctl_dict_entry_t * in appropriate entity list. 2404 */ 2405 2406 mutex_enter(&rctl_lists_lock); 2407 2408 switch (entity) { 2409 case RCENTITY_ZONE: 2410 case RCENTITY_PROJECT: 2411 case RCENTITY_TASK: 2412 case RCENTITY_PROCESS: 2413 rctl_de->rcd_next = rctl_lists[entity]; 2414 rctl_lists[entity] = rctl_de; 2415 break; 2416 default: 2417 panic("registering unknown rctl entity %d (%s)", entity, 2418 name); 2419 break; 2420 } 2421 2422 mutex_exit(&rctl_lists_lock); 2423 2424 /* 2425 * 4. Drop lock. 2426 */ 2427 mutex_exit(&rctl_dict_lock); 2428 2429 return (rhndl); 2430 } 2431 2432 /* 2433 * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, 2434 * rctl_val_t *v) 2435 * 2436 * Overview 2437 * rctl_global_action() takes, in according with the flags on the rctl_dict 2438 * entry for the given control, the appropriate actions on the exceeded 2439 * control value. Additionally, rctl_global_action() updates the firing time 2440 * on the exceeded value. 2441 * 2442 * Return values 2443 * A bitmask reflecting the actions actually taken. 2444 * 2445 * Caller's context 2446 * No restrictions on context. 2447 */ 2448 /*ARGSUSED*/ 2449 static int 2450 rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v) 2451 { 2452 rctl_dict_entry_t *rde = r->rc_dict_entry; 2453 const char *pr, *en, *idstr; 2454 id_t id; 2455 enum { 2456 SUFFIX_NONE, /* id consumed directly */ 2457 SUFFIX_NUMERIC, /* id consumed in suffix */ 2458 SUFFIX_STRING /* idstr consumed in suffix */ 2459 } suffix = SUFFIX_NONE; 2460 int ret = 0; 2461 2462 v->rcv_firing_time = gethrtime(); 2463 2464 switch (v->rcv_privilege) { 2465 case RCPRIV_BASIC: 2466 pr = "basic"; 2467 break; 2468 case RCPRIV_PRIVILEGED: 2469 pr = "privileged"; 2470 break; 2471 case RCPRIV_SYSTEM: 2472 pr = "system"; 2473 break; 2474 default: 2475 pr = "unknown"; 2476 break; 2477 } 2478 2479 switch (rde->rcd_entity) { 2480 case RCENTITY_PROCESS: 2481 en = "process"; 2482 id = p->p_pid; 2483 suffix = SUFFIX_NONE; 2484 break; 2485 case RCENTITY_TASK: 2486 en = "task"; 2487 id = p->p_task->tk_tkid; 2488 suffix = SUFFIX_NUMERIC; 2489 break; 2490 case RCENTITY_PROJECT: 2491 en = "project"; 2492 id = p->p_task->tk_proj->kpj_id; 2493 suffix = SUFFIX_NUMERIC; 2494 break; 2495 case RCENTITY_ZONE: 2496 en = "zone"; 2497 idstr = p->p_zone->zone_name; 2498 suffix = SUFFIX_STRING; 2499 break; 2500 default: 2501 en = "unknown entity associated with process"; 2502 id = p->p_pid; 2503 suffix = SUFFIX_NONE; 2504 break; 2505 } 2506 2507 if (rde->rcd_flagaction & RCTL_GLOBAL_SYSLOG) { 2508 switch (suffix) { 2509 default: 2510 case SUFFIX_NONE: 2511 (void) strlog(0, 0, 0, 2512 rde->rcd_strlog_flags | log_global.lz_active, 2513 "%s rctl %s (value %llu) exceeded by %s %d.", 2514 pr, rde->rcd_name, v->rcv_value, en, id); 2515 break; 2516 case SUFFIX_NUMERIC: 2517 (void) strlog(0, 0, 0, 2518 rde->rcd_strlog_flags | log_global.lz_active, 2519 "%s rctl %s (value %llu) exceeded by process %d" 2520 " in %s %d.", 2521 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2522 en, id); 2523 break; 2524 case SUFFIX_STRING: 2525 (void) strlog(0, 0, 0, 2526 rde->rcd_strlog_flags | log_global.lz_active, 2527 "%s rctl %s (value %llu) exceeded by process %d" 2528 " in %s %s.", 2529 pr, rde->rcd_name, v->rcv_value, p->p_pid, 2530 en, idstr); 2531 break; 2532 } 2533 } 2534 2535 if (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) 2536 ret |= RCT_DENY; 2537 2538 return (ret); 2539 } 2540 2541 static int 2542 rctl_local_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v, 2543 uint_t safety) 2544 { 2545 int ret = 0; 2546 sigqueue_t *sqp = NULL; 2547 rctl_dict_entry_t *rde = r->rc_dict_entry; 2548 int unobservable = (rde->rcd_flagaction & RCTL_GLOBAL_UNOBSERVABLE); 2549 2550 proc_t *recipient = v->rcv_action_recipient; 2551 id_t recip_pid = v->rcv_action_recip_pid; 2552 int recip_signal = v->rcv_action_signal; 2553 uint_t flagaction = v->rcv_flagaction; 2554 2555 if (safety == RCA_UNSAFE_ALL) { 2556 if (flagaction & RCTL_LOCAL_DENY) { 2557 ret |= RCT_DENY; 2558 } 2559 return (ret); 2560 } 2561 2562 if (flagaction & RCTL_LOCAL_SIGNAL) { 2563 /* 2564 * We can build a siginfo only in the case that it is 2565 * safe for us to drop p_lock. (For asynchronous 2566 * checks this is currently not true.) 2567 */ 2568 if (safety == RCA_SAFE) { 2569 mutex_exit(&rset->rcs_lock); 2570 mutex_exit(&p->p_lock); 2571 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 2572 mutex_enter(&p->p_lock); 2573 mutex_enter(&rset->rcs_lock); 2574 2575 sqp->sq_info.si_signo = recip_signal; 2576 sqp->sq_info.si_code = SI_RCTL; 2577 sqp->sq_info.si_errno = 0; 2578 sqp->sq_info.si_entity = (int)rde->rcd_entity; 2579 } 2580 2581 if (recipient == NULL || recipient == p) { 2582 ret |= RCT_SIGNAL; 2583 2584 if (sqp == NULL) { 2585 sigtoproc(p, NULL, recip_signal); 2586 } else if (p == curproc) { 2587 /* 2588 * Then this is a synchronous test and we can 2589 * direct the signal at the violating thread. 2590 */ 2591 sigaddqa(curproc, curthread, sqp); 2592 } else { 2593 sigaddqa(p, NULL, sqp); 2594 } 2595 } else if (!unobservable) { 2596 proc_t *rp; 2597 2598 mutex_exit(&rset->rcs_lock); 2599 mutex_exit(&p->p_lock); 2600 2601 mutex_enter(&pidlock); 2602 if ((rp = prfind(recip_pid)) == recipient) { 2603 /* 2604 * Recipient process is still alive, but may not 2605 * be in this task or project any longer. In 2606 * this case, the recipient's resource control 2607 * set pertinent to this control will have 2608 * changed--and we will not deliver the signal, 2609 * as the recipient process is trying to tear 2610 * itself off of its former set. 2611 */ 2612 mutex_enter(&rp->p_lock); 2613 mutex_exit(&pidlock); 2614 2615 if (rctl_entity_obtain_rset(rde, rp) == rset) { 2616 ret |= RCT_SIGNAL; 2617 2618 if (sqp == NULL) 2619 sigtoproc(rp, NULL, 2620 recip_signal); 2621 else 2622 sigaddqa(rp, NULL, sqp); 2623 } else if (sqp) { 2624 kmem_free(sqp, sizeof (sigqueue_t)); 2625 } 2626 mutex_exit(&rp->p_lock); 2627 } else { 2628 mutex_exit(&pidlock); 2629 if (sqp) 2630 kmem_free(sqp, sizeof (sigqueue_t)); 2631 } 2632 2633 mutex_enter(&p->p_lock); 2634 /* 2635 * Since we dropped p_lock, we may no longer be in the 2636 * same task or project as we were at entry. It is thus 2637 * unsafe for us to reacquire the set lock at this 2638 * point; callers of rctl_local_action() must handle 2639 * this possibility. 2640 */ 2641 ret |= RCT_LK_ABANDONED; 2642 } else if (sqp) { 2643 kmem_free(sqp, sizeof (sigqueue_t)); 2644 } 2645 } 2646 2647 if ((flagaction & RCTL_LOCAL_DENY) && 2648 (recipient == NULL || recipient == p)) { 2649 ret |= RCT_DENY; 2650 } 2651 2652 return (ret); 2653 } 2654 2655 /* 2656 * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t) 2657 * 2658 * Overview 2659 * Take the action associated with the enforced value (as defined by 2660 * rctl_get_enforced_value()) being exceeded or encountered. Possibly perform 2661 * a restricted subset of the available actions, if circumstances dictate that 2662 * we cannot safely allocate memory (for a sigqueue_t) or guarantee process 2663 * persistence across the duration of the function (an asynchronous action). 2664 * 2665 * Return values 2666 * Actions taken, according to the rctl_test bitmask. 2667 * 2668 * Caller's context 2669 * Safe to acquire rcs_lock. 2670 */ 2671 int 2672 rctl_action(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, uint_t safety) 2673 { 2674 return (rctl_action_entity(hndl, rset, p, NULL, safety)); 2675 } 2676 2677 int 2678 rctl_action_entity(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, 2679 rctl_entity_p_t *e, uint_t safety) 2680 { 2681 int ret = RCT_NONE; 2682 rctl_t *lrctl; 2683 rctl_entity_p_t e_tmp; 2684 2685 rctl_action_acquire: 2686 mutex_enter(&rset->rcs_lock); 2687 if (rctl_set_find(rset, hndl, &lrctl) == -1) { 2688 mutex_exit(&rset->rcs_lock); 2689 return (ret); 2690 } 2691 2692 if (e == NULL) { 2693 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2694 p, &e_tmp); 2695 e = &e_tmp; 2696 } 2697 2698 if ((ret & RCT_LK_ABANDONED) == 0) { 2699 ret |= rctl_global_action(lrctl, rset, p, lrctl->rc_cursor); 2700 2701 RCTLOP_ACTION(lrctl, p, e); 2702 2703 ret |= rctl_local_action(lrctl, rset, p, 2704 lrctl->rc_cursor, safety); 2705 2706 if (ret & RCT_LK_ABANDONED) 2707 goto rctl_action_acquire; 2708 } 2709 2710 ret &= ~RCT_LK_ABANDONED; 2711 2712 if (!(ret & RCT_DENY) && 2713 lrctl->rc_cursor->rcv_next != NULL) { 2714 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2715 2716 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2717 p, lrctl->rc_cursor->rcv_value)); 2718 2719 } 2720 mutex_exit(&rset->rcs_lock); 2721 2722 return (ret); 2723 } 2724 2725 /* 2726 * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t) 2727 * 2728 * Overview 2729 * Increment the resource associated with the given handle, returning zero if 2730 * the incremented value does not exceed the threshold for the current limit 2731 * on the resource. 2732 * 2733 * Return values 2734 * Actions taken, according to the rctl_test bitmask. 2735 * 2736 * Caller's context 2737 * p_lock held by caller. 2738 */ 2739 /*ARGSUSED*/ 2740 int 2741 rctl_test(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2742 rctl_qty_t incr, uint_t flags) 2743 { 2744 return (rctl_test_entity(rhndl, rset, p, NULL, incr, flags)); 2745 } 2746 2747 int 2748 rctl_test_entity(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p, 2749 rctl_entity_p_t *e, rctl_qty_t incr, uint_t flags) 2750 { 2751 rctl_t *lrctl; 2752 int ret = RCT_NONE; 2753 rctl_entity_p_t e_tmp; 2754 if (p == &p0) { 2755 /* 2756 * We don't enforce rctls on the kernel itself. 2757 */ 2758 return (ret); 2759 } 2760 2761 rctl_test_acquire: 2762 ASSERT(MUTEX_HELD(&p->p_lock)); 2763 2764 mutex_enter(&rset->rcs_lock); 2765 2766 /* 2767 * Dereference from rctl_set. We don't enforce newly loaded controls 2768 * that haven't been set on this entity (since the only valid value is 2769 * the infinite system value). 2770 */ 2771 if (rctl_set_find(rset, rhndl, &lrctl) == -1) { 2772 mutex_exit(&rset->rcs_lock); 2773 return (ret); 2774 } 2775 2776 /* 2777 * This control is currently unenforced: maximal value on control 2778 * supporting infinitely available resource. 2779 */ 2780 if ((lrctl->rc_dict_entry->rcd_flagaction & RCTL_GLOBAL_INFINITE) && 2781 (lrctl->rc_cursor->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) { 2782 2783 mutex_exit(&rset->rcs_lock); 2784 return (ret); 2785 } 2786 2787 /* 2788 * If we have been called by rctl_test, look up the entity pointer 2789 * from the proc pointer. 2790 */ 2791 if (e == NULL) { 2792 rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity, 2793 p, &e_tmp); 2794 e = &e_tmp; 2795 } 2796 2797 /* 2798 * Get enforced rctl value and current usage. Test the increment 2799 * with the current usage against the enforced value--take action as 2800 * necessary. 2801 */ 2802 while (RCTLOP_TEST(lrctl, p, e, lrctl->rc_cursor, incr, flags)) { 2803 if ((ret & RCT_LK_ABANDONED) == 0) { 2804 ret |= rctl_global_action(lrctl, rset, p, 2805 lrctl->rc_cursor); 2806 2807 RCTLOP_ACTION(lrctl, p, e); 2808 2809 ret |= rctl_local_action(lrctl, rset, p, 2810 lrctl->rc_cursor, flags); 2811 2812 if (ret & RCT_LK_ABANDONED) 2813 goto rctl_test_acquire; 2814 } 2815 2816 ret &= ~RCT_LK_ABANDONED; 2817 2818 if ((ret & RCT_DENY) == RCT_DENY || 2819 lrctl->rc_cursor->rcv_next == NULL) { 2820 ret |= RCT_DENY; 2821 break; 2822 } 2823 2824 lrctl->rc_cursor = lrctl->rc_cursor->rcv_next; 2825 RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry, 2826 p, lrctl->rc_cursor->rcv_value)); 2827 } 2828 2829 mutex_exit(&rset->rcs_lock); 2830 2831 return (ret); 2832 } 2833 2834 /* 2835 * void rctl_init(void) 2836 * 2837 * Overview 2838 * Initialize the rctl subsystem, including the primoridal rctls 2839 * provided by the system. New subsystem-specific rctls should _not_ be 2840 * initialized here. (Do it in your own file.) 2841 * 2842 * Return values 2843 * None. 2844 * 2845 * Caller's context 2846 * Safe for KM_SLEEP allocations. Must be called prior to any process model 2847 * initialization. 2848 */ 2849 void 2850 rctl_init(void) 2851 { 2852 rctl_cache = kmem_cache_create("rctl_cache", sizeof (rctl_t), 2853 0, NULL, NULL, NULL, NULL, NULL, 0); 2854 rctl_val_cache = kmem_cache_create("rctl_val_cache", 2855 sizeof (rctl_val_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 2856 2857 rctl_dict = mod_hash_create_extended("rctl_dict", 2858 rctl_dict_size, mod_hash_null_keydtor, rctl_dict_val_dtor, 2859 rctl_dict_hash_by_id, NULL, rctl_dict_id_cmp, KM_SLEEP); 2860 rctl_dict_by_name = mod_hash_create_strhash( 2861 "rctl_handles_by_name", rctl_dict_size, 2862 mod_hash_null_valdtor); 2863 rctl_ids = id_space_create("rctl_ids", 1, max_rctl_hndl); 2864 bzero(rctl_lists, (RC_MAX_ENTITY + 1) * sizeof (rctl_dict_entry_t *)); 2865 2866 rctlproc_init(); 2867 } 2868 2869 /* 2870 * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2871 * int chargeproc) 2872 * 2873 * Increments the amount of locked memory on a project, and 2874 * zone. If proj is non-NULL the project must be held by the 2875 * caller; if it is NULL the proj and zone of proc_t p are used. 2876 * If chargeproc is non-zero, then the charged amount is cached 2877 * on p->p_locked_mem so that the charge can be migrated when a 2878 * process changes projects. 2879 * 2880 * Return values 2881 * 0 - success 2882 * EAGAIN - attempting to increment locked memory is denied by one 2883 * or more resource entities. 2884 */ 2885 int 2886 rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2887 int chargeproc) 2888 { 2889 kproject_t *projp; 2890 zone_t *zonep; 2891 rctl_entity_p_t e; 2892 int ret = 0; 2893 2894 ASSERT(p != NULL); 2895 ASSERT(MUTEX_HELD(&p->p_lock)); 2896 if (proj != NULL) { 2897 projp = proj; 2898 zonep = proj->kpj_zone; 2899 } else { 2900 projp = p->p_task->tk_proj; 2901 zonep = p->p_zone; 2902 } 2903 2904 mutex_enter(&zonep->zone_mem_lock); 2905 2906 e.rcep_p.proj = projp; 2907 e.rcep_t = RCENTITY_PROJECT; 2908 2909 /* check for overflow */ 2910 if ((projp->kpj_data.kpd_locked_mem + inc) < 2911 projp->kpj_data.kpd_locked_mem) { 2912 ret = EAGAIN; 2913 goto out; 2914 } 2915 if (projp->kpj_data.kpd_locked_mem + inc > 2916 projp->kpj_data.kpd_locked_mem_ctl) { 2917 if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls, 2918 p, &e, inc, 0) & RCT_DENY) { 2919 ret = EAGAIN; 2920 goto out; 2921 } 2922 } 2923 e.rcep_p.zone = zonep; 2924 e.rcep_t = RCENTITY_ZONE; 2925 2926 /* Check for overflow */ 2927 if ((zonep->zone_locked_mem + inc) < zonep->zone_locked_mem) { 2928 ret = EAGAIN; 2929 goto out; 2930 } 2931 if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) { 2932 if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls, 2933 p, &e, inc, 0) & RCT_DENY) { 2934 ret = EAGAIN; 2935 goto out; 2936 } 2937 } 2938 2939 zonep->zone_locked_mem += inc; 2940 projp->kpj_data.kpd_locked_mem += inc; 2941 if (chargeproc != 0) { 2942 p->p_locked_mem += inc; 2943 } 2944 out: 2945 mutex_exit(&zonep->zone_mem_lock); 2946 return (ret); 2947 } 2948 2949 /* 2950 * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2951 * int creditproc) 2952 * 2953 * Decrements the amount of locked memory on a project and 2954 * zone. If proj is non-NULL the project must be held by the 2955 * caller; if it is NULL the proj and zone of proc_t p are used. 2956 * If creditproc is non-zero, then the quantity of locked memory 2957 * is subtracted from p->p_locked_mem. 2958 * 2959 * Return values 2960 * none 2961 */ 2962 void 2963 rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc, 2964 int creditproc) 2965 { 2966 kproject_t *projp; 2967 zone_t *zonep; 2968 2969 if (proj != NULL) { 2970 projp = proj; 2971 zonep = proj->kpj_zone; 2972 } else { 2973 ASSERT(p != NULL); 2974 ASSERT(MUTEX_HELD(&p->p_lock)); 2975 projp = p->p_task->tk_proj; 2976 zonep = p->p_zone; 2977 } 2978 2979 mutex_enter(&zonep->zone_mem_lock); 2980 zonep->zone_locked_mem -= inc; 2981 projp->kpj_data.kpd_locked_mem -= inc; 2982 if (creditproc != 0) { 2983 ASSERT(p != NULL); 2984 ASSERT(MUTEX_HELD(&p->p_lock)); 2985 p->p_locked_mem -= inc; 2986 } 2987 mutex_exit(&zonep->zone_mem_lock); 2988 } 2989 2990 /* 2991 * rctl_incr_swap(proc_t *, zone_t *, size_t) 2992 * 2993 * Overview 2994 * Increments the swap charge on the specified zone. 2995 * 2996 * Return values 2997 * 0 on success. EAGAIN if swap increment fails due an rctl value 2998 * on the zone. 2999 * 3000 * Callers context 3001 * p_lock held on specified proc. 3002 * swap must be even multiple of PAGESIZE 3003 */ 3004 int 3005 rctl_incr_swap(proc_t *proc, zone_t *zone, size_t swap) 3006 { 3007 rctl_entity_p_t e; 3008 3009 ASSERT(MUTEX_HELD(&proc->p_lock)); 3010 ASSERT((swap & PAGEOFFSET) == 0); 3011 e.rcep_p.zone = zone; 3012 e.rcep_t = RCENTITY_ZONE; 3013 3014 mutex_enter(&zone->zone_mem_lock); 3015 3016 /* Check for overflow */ 3017 if ((zone->zone_max_swap + swap) < zone->zone_max_swap) { 3018 mutex_exit(&zone->zone_mem_lock); 3019 return (EAGAIN); 3020 } 3021 if ((zone->zone_max_swap + swap) > 3022 zone->zone_max_swap_ctl) { 3023 3024 if (rctl_test_entity(rc_zone_max_swap, zone->zone_rctls, 3025 proc, &e, swap, 0) & RCT_DENY) { 3026 mutex_exit(&zone->zone_mem_lock); 3027 return (EAGAIN); 3028 } 3029 } 3030 zone->zone_max_swap += swap; 3031 mutex_exit(&zone->zone_mem_lock); 3032 return (0); 3033 } 3034 3035 /* 3036 * rctl_decr_swap(zone_t *, size_t) 3037 * 3038 * Overview 3039 * Decrements the swap charge on the specified zone. 3040 * 3041 * Return values 3042 * None 3043 * 3044 * Callers context 3045 * swap must be even multiple of PAGESIZE 3046 */ 3047 void 3048 rctl_decr_swap(zone_t *zone, size_t swap) 3049 { 3050 ASSERT((swap & PAGEOFFSET) == 0); 3051 mutex_enter(&zone->zone_mem_lock); 3052 ASSERT(zone->zone_max_swap >= swap); 3053 zone->zone_max_swap -= swap; 3054 mutex_exit(&zone->zone_mem_lock); 3055 } 3056 3057 /* 3058 * rctl_incr_lofi(proc_t *, zone_t *, size_t) 3059 * 3060 * Overview 3061 * Increments the number of lofi devices for the zone. 3062 * 3063 * Return values 3064 * 0 on success. EAGAIN if increment fails due an rctl value 3065 * on the zone. 3066 * 3067 * Callers context 3068 * p_lock held on specified proc. 3069 */ 3070 int 3071 rctl_incr_lofi(proc_t *proc, zone_t *zone, size_t incr) 3072 { 3073 rctl_entity_p_t e; 3074 3075 ASSERT(MUTEX_HELD(&proc->p_lock)); 3076 ASSERT(incr > 0); 3077 3078 e.rcep_p.zone = zone; 3079 e.rcep_t = RCENTITY_ZONE; 3080 3081 mutex_enter(&zone->zone_rctl_lock); 3082 3083 /* Check for overflow */ 3084 if ((zone->zone_max_lofi + incr) < zone->zone_max_lofi) { 3085 mutex_exit(&zone->zone_rctl_lock); 3086 return (EAGAIN); 3087 } 3088 if ((zone->zone_max_lofi + incr) > zone->zone_max_lofi_ctl) { 3089 if (rctl_test_entity(rc_zone_max_lofi, zone->zone_rctls, 3090 proc, &e, incr, 0) & RCT_DENY) { 3091 mutex_exit(&zone->zone_rctl_lock); 3092 return (EAGAIN); 3093 } 3094 } 3095 zone->zone_max_lofi += incr; 3096 mutex_exit(&zone->zone_rctl_lock); 3097 return (0); 3098 } 3099 3100 /* 3101 * rctl_decr_lofi(zone_t *, size_t) 3102 * 3103 * Overview 3104 * Decrements the number of lofi devices for the zone. 3105 */ 3106 void 3107 rctl_decr_lofi(zone_t *zone, size_t decr) 3108 { 3109 mutex_enter(&zone->zone_rctl_lock); 3110 ASSERT(zone->zone_max_lofi >= decr); 3111 zone->zone_max_lofi -= decr; 3112 mutex_exit(&zone->zone_rctl_lock); 3113 } 3114 3115 /* 3116 * Create resource kstat 3117 */ 3118 static kstat_t * 3119 rctl_kstat_create_common(char *ks_name, int ks_instance, char *ks_class, 3120 uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, int ks_zoneid) 3121 { 3122 kstat_t *ksp = NULL; 3123 char name[KSTAT_STRLEN]; 3124 3125 (void) snprintf(name, KSTAT_STRLEN, "%s_%d", ks_name, ks_instance); 3126 3127 if ((ksp = kstat_create_zone("caps", ks_zoneid, 3128 name, ks_class, ks_type, 3129 ks_ndata, ks_flags, ks_zoneid)) != NULL) { 3130 if (ks_zoneid != GLOBAL_ZONEID) 3131 kstat_zone_add(ksp, GLOBAL_ZONEID); 3132 } 3133 return (ksp); 3134 } 3135 3136 /* 3137 * Create zone-specific resource kstat 3138 */ 3139 kstat_t * 3140 rctl_kstat_create_zone(zone_t *zone, char *ks_name, uchar_t ks_type, 3141 uint_t ks_ndata, uchar_t ks_flags) 3142 { 3143 char name[KSTAT_STRLEN]; 3144 3145 (void) snprintf(name, KSTAT_STRLEN, "%s_zone", ks_name); 3146 3147 return (rctl_kstat_create_common(name, zone->zone_id, "zone_caps", 3148 ks_type, ks_ndata, ks_flags, zone->zone_id)); 3149 } 3150 3151 /* 3152 * Create project-specific resource kstat 3153 */ 3154 kstat_t * 3155 rctl_kstat_create_project(kproject_t *kpj, char *ks_name, uchar_t ks_type, 3156 uint_t ks_ndata, uchar_t ks_flags) 3157 { 3158 char name[KSTAT_STRLEN]; 3159 3160 (void) snprintf(name, KSTAT_STRLEN, "%s_project", ks_name); 3161 3162 return (rctl_kstat_create_common(name, kpj->kpj_id, "project_caps", 3163 ks_type, ks_ndata, ks_flags, kpj->kpj_zoneid)); 3164 } 3165 3166 /* 3167 * Create task-specific resource kstat 3168 */ 3169 kstat_t * 3170 rctl_kstat_create_task(task_t *tk, char *ks_name, uchar_t ks_type, 3171 uint_t ks_ndata, uchar_t ks_flags) 3172 { 3173 char name[KSTAT_STRLEN]; 3174 3175 (void) snprintf(name, KSTAT_STRLEN, "%s_task", ks_name); 3176 3177 return (rctl_kstat_create_common(name, tk->tk_tkid, "task_caps", 3178 ks_type, ks_ndata, ks_flags, tk->tk_proj->kpj_zoneid)); 3179 } 3180