1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Contracts 28 * --------- 29 * 30 * Contracts are a primitive which enrich the relationships between 31 * processes and system resources. The primary purpose of contracts is 32 * to provide a means for the system to negotiate the departure from a 33 * binding relationship (e.g. pages locked in memory or a thread bound 34 * to processor), but they can also be used as a purely asynchronous 35 * error reporting mechanism as they are with process contracts. 36 * 37 * More information on how one interfaces with contracts and what 38 * contracts can do for you can be found in: 39 * PSARC 2003/193 Solaris Contracts 40 * PSARC 2004/460 Contracts addendum 41 * 42 * This file contains the core contracts framework. By itself it is 43 * useless: it depends the contracts filesystem (ctfs) to provide an 44 * interface to user processes and individual contract types to 45 * implement the process/resource relationships. 46 * 47 * Data structure overview 48 * ----------------------- 49 * 50 * A contract is represented by a contract_t, which itself points to an 51 * encapsulating contract-type specific contract object. A contract_t 52 * contains the contract's static identity (including its terms), its 53 * linkage to various bookkeeping structures, the contract-specific 54 * event queue, and a reference count. 55 * 56 * A contract template is represented by a ct_template_t, which, like a 57 * contract, points to an encapsulating contract-type specific template 58 * object. A ct_template_t contains the template's terms. 59 * 60 * An event queue is represented by a ct_equeue_t, and consists of a 61 * list of events, a list of listeners, and a list of listeners who are 62 * waiting for new events (affectionately referred to as "tail 63 * listeners"). There are three queue types, defined by ct_listnum_t 64 * (an enum). An event may be on one of each type of queue 65 * simultaneously; the list linkage used by a queue is determined by 66 * its type. 67 * 68 * An event is represented by a ct_kevent_t, which contains mostly 69 * static event data (e.g. id, payload). It also has an array of 70 * ct_member_t structures, each of which contains a list_node_t and 71 * represent the event's linkage in a specific event queue. 72 * 73 * Each open of an event endpoint results in the creation of a new 74 * listener, represented by a ct_listener_t. In addition to linkage 75 * into the aforementioned lists in the event_queue, a ct_listener_t 76 * contains a pointer to the ct_kevent_t it is currently positioned at 77 * as well as a set of status flags and other administrative data. 78 * 79 * Each process has a list of contracts it owns, p_ct_held; a pointer 80 * to the process contract it is a member of, p_ct_process; the linkage 81 * for that membership, p_ct_member; and an array of event queue 82 * structures representing the process bundle queues. 83 * 84 * Each LWP has an array of its active templates, lwp_ct_active; and 85 * the most recently created contracts, lwp_ct_latest. 86 * 87 * A process contract has a list of member processes and a list of 88 * inherited contracts. 89 * 90 * There is a system-wide list of all contracts, as well as per-type 91 * lists of contracts. 92 * 93 * Lock ordering overview 94 * ---------------------- 95 * 96 * Locks at the top are taken first: 97 * 98 * ct_evtlock 99 * regent ct_lock 100 * member ct_lock 101 * pidlock 102 * p_lock 103 * contract ctq_lock contract_lock 104 * pbundle ctq_lock 105 * cte_lock 106 * ct_reflock 107 * 108 * contract_lock and ctq_lock/cte_lock are not currently taken at the 109 * same time. 110 * 111 * Reference counting and locking 112 * ------------------------------ 113 * 114 * A contract has a reference count, protected by ct_reflock. 115 * (ct_reflock is also used in a couple other places where atomic 116 * access to a variable is needed in an innermost context). A process 117 * maintains a hold on each contract it owns. A process contract has a 118 * hold on each contract is has inherited. Each event has a hold on 119 * the contract which generated it. Process contract templates have 120 * holds on the contracts referred to by their transfer terms. CTFS 121 * contract directory nodes have holds on contracts. Lastly, various 122 * code paths may temporarily take holds on contracts to prevent them 123 * from disappearing while other processing is going on. It is 124 * important to note that the global contract lists do not hold 125 * references on contracts; a contract is removed from these structures 126 * atomically with the release of its last reference. 127 * 128 * At a given point in time, a contract can either be owned by a 129 * process, inherited by a regent process contract, or orphaned. A 130 * contract_t's owner and regent pointers, ct_owner and ct_regent, are 131 * protected by its ct_lock. The linkage in the holder's (holder = 132 * owner or regent) list of contracts, ct_ctlist, is protected by 133 * whatever lock protects the holder's data structure. In order for 134 * these two directions to remain consistent, changing the holder of a 135 * contract requires that both locks be held. 136 * 137 * Events also have reference counts. There is one hold on an event 138 * per queue it is present on, in addition to those needed for the 139 * usual sundry reasons. Individual listeners are associated with 140 * specific queues, and increase a queue-specific reference count 141 * stored in the ct_member_t structure. 142 * 143 * The dynamic contents of an event (reference count and flags) are 144 * protected by its cte_lock, while the contents of the embedded 145 * ct_member_t structures are protected by the locks of the queues they 146 * are linked into. A ct_listener_t's contents are also protected by 147 * its event queue's ctq_lock. 148 * 149 * Resource controls 150 * ----------------- 151 * 152 * Control: project.max-contracts (rc_project_contract) 153 * Description: Maximum number of contracts allowed a project. 154 * 155 * When a contract is created, the project's allocation is tested and 156 * (assuming success) increased. When the last reference to a 157 * contract is released, the creating project's allocation is 158 * decreased. 159 */ 160 161 #include <sys/mutex.h> 162 #include <sys/debug.h> 163 #include <sys/types.h> 164 #include <sys/param.h> 165 #include <sys/kmem.h> 166 #include <sys/thread.h> 167 #include <sys/id_space.h> 168 #include <sys/avl.h> 169 #include <sys/list.h> 170 #include <sys/sysmacros.h> 171 #include <sys/proc.h> 172 #include <sys/ctfs.h> 173 #include <sys/contract_impl.h> 174 #include <sys/contract/process_impl.h> 175 #include <sys/dditypes.h> 176 #include <sys/contract/device_impl.h> 177 #include <sys/systm.h> 178 #include <sys/atomic.h> 179 #include <sys/cmn_err.h> 180 #include <sys/model.h> 181 #include <sys/policy.h> 182 #include <sys/zone.h> 183 #include <sys/task.h> 184 #include <sys/ddi.h> 185 #include <sys/sunddi.h> 186 187 extern rctl_hndl_t rc_project_contract; 188 189 static id_space_t *contract_ids; 190 static avl_tree_t contract_avl; 191 static kmutex_t contract_lock; 192 193 int ct_ntypes = CTT_MAXTYPE; 194 static ct_type_t *ct_types_static[CTT_MAXTYPE]; 195 ct_type_t **ct_types = ct_types_static; 196 int ct_debug; 197 198 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int); 199 static void cte_queue_destroy(ct_equeue_t *); 200 static void cte_queue_drain(ct_equeue_t *, int); 201 static void cte_trim(ct_equeue_t *, contract_t *); 202 static void cte_copy(ct_equeue_t *, ct_equeue_t *); 203 204 /* 205 * contract_compar 206 * 207 * A contract comparator which sorts on contract ID. 208 */ 209 int 210 contract_compar(const void *x, const void *y) 211 { 212 const contract_t *ct1 = x; 213 const contract_t *ct2 = y; 214 215 if (ct1->ct_id < ct2->ct_id) 216 return (-1); 217 if (ct1->ct_id > ct2->ct_id) 218 return (1); 219 return (0); 220 } 221 222 /* 223 * contract_init 224 * 225 * Initializes the contract subsystem, the specific contract types, and 226 * process 0. 227 */ 228 void 229 contract_init(void) 230 { 231 /* 232 * Initialize contract subsystem. 233 */ 234 contract_ids = id_space_create("contracts", 1, INT_MAX); 235 avl_create(&contract_avl, contract_compar, sizeof (contract_t), 236 offsetof(contract_t, ct_ctavl)); 237 mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL); 238 239 /* 240 * Initialize contract types. 241 */ 242 contract_process_init(); 243 contract_device_init(); 244 245 /* 246 * Initialize p0/lwp0 contract state. 247 */ 248 avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t), 249 offsetof(contract_t, ct_ctlist)); 250 } 251 252 /* 253 * contract_dtor 254 * 255 * Performs basic destruction of the common portions of a contract. 256 * Called from the failure path of contract_ctor and from 257 * contract_rele. 258 */ 259 static void 260 contract_dtor(contract_t *ct) 261 { 262 cte_queue_destroy(&ct->ct_events); 263 list_destroy(&ct->ct_vnodes); 264 mutex_destroy(&ct->ct_reflock); 265 mutex_destroy(&ct->ct_lock); 266 mutex_destroy(&ct->ct_evtlock); 267 } 268 269 /* 270 * contract_ctor 271 * 272 * Called by a contract type to initialize a contract. Fails if the 273 * max-contract resource control would have been exceeded. After a 274 * successful call to contract_ctor, the contract is unlocked and 275 * visible in all namespaces; any type-specific initialization should 276 * be completed before calling contract_ctor. Returns 0 on success. 277 * 278 * Because not all callers can tolerate failure, a 0 value for canfail 279 * instructs contract_ctor to ignore the project.max-contracts resource 280 * control. Obviously, this "out" should only be employed by callers 281 * who are sufficiently constrained in other ways (e.g. newproc). 282 */ 283 int 284 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data, 285 ctflags_t flags, proc_t *author, int canfail) 286 { 287 avl_index_t where; 288 klwp_t *curlwp = ttolwp(curthread); 289 290 ASSERT(author == curproc); 291 292 mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL); 293 mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL); 294 mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL); 295 ct->ct_id = id_alloc(contract_ids); 296 297 cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0); 298 list_create(&ct->ct_vnodes, sizeof (contract_vnode_t), 299 offsetof(contract_vnode_t, ctv_node)); 300 301 /* 302 * Instance data 303 */ 304 ct->ct_ref = 2; /* one for the holder, one for "latest" */ 305 ct->ct_cuid = crgetuid(CRED()); 306 ct->ct_type = type; 307 ct->ct_data = data; 308 gethrestime(&ct->ct_ctime); 309 ct->ct_state = CTS_OWNED; 310 ct->ct_flags = flags; 311 ct->ct_regent = author->p_ct_process ? 312 &author->p_ct_process->conp_contract : NULL; 313 ct->ct_ev_info = tmpl->ctmpl_ev_info; 314 ct->ct_ev_crit = tmpl->ctmpl_ev_crit; 315 ct->ct_cookie = tmpl->ctmpl_cookie; 316 ct->ct_owner = author; 317 ct->ct_ntime.ctm_total = -1; 318 ct->ct_qtime.ctm_total = -1; 319 ct->ct_nevent = NULL; 320 321 /* 322 * Test project.max-contracts. 323 */ 324 mutex_enter(&author->p_lock); 325 mutex_enter(&contract_lock); 326 if (canfail && rctl_test(rc_project_contract, 327 author->p_task->tk_proj->kpj_rctls, author, 1, 328 RCA_SAFE) & RCT_DENY) { 329 id_free(contract_ids, ct->ct_id); 330 mutex_exit(&contract_lock); 331 mutex_exit(&author->p_lock); 332 ct->ct_events.ctq_flags |= CTQ_DEAD; 333 contract_dtor(ct); 334 return (1); 335 } 336 ct->ct_proj = author->p_task->tk_proj; 337 ct->ct_proj->kpj_data.kpd_contract++; 338 (void) project_hold(ct->ct_proj); 339 mutex_exit(&contract_lock); 340 341 /* 342 * Insert into holder's avl of contracts. 343 * We use an avl not because order is important, but because 344 * readdir of /proc/contracts requires we be able to use a 345 * scalar as an index into the process's list of contracts 346 */ 347 ct->ct_zoneid = author->p_zone->zone_id; 348 ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid; 349 VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL); 350 avl_insert(&author->p_ct_held, ct, where); 351 mutex_exit(&author->p_lock); 352 353 /* 354 * Insert into global contract AVL 355 */ 356 mutex_enter(&contract_lock); 357 VERIFY(avl_find(&contract_avl, ct, &where) == NULL); 358 avl_insert(&contract_avl, ct, where); 359 mutex_exit(&contract_lock); 360 361 /* 362 * Insert into type AVL 363 */ 364 mutex_enter(&type->ct_type_lock); 365 VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL); 366 avl_insert(&type->ct_type_avl, ct, where); 367 type->ct_type_timestruc = ct->ct_ctime; 368 mutex_exit(&type->ct_type_lock); 369 370 if (curlwp->lwp_ct_latest[type->ct_type_index]) 371 contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]); 372 curlwp->lwp_ct_latest[type->ct_type_index] = ct; 373 374 return (0); 375 } 376 377 /* 378 * contract_rele 379 * 380 * Releases a reference to a contract. If the caller had the last 381 * reference, the contract is removed from all namespaces, its 382 * allocation against the max-contracts resource control is released, 383 * and the contract type's free entry point is invoked for any 384 * type-specific deconstruction and to (presumably) free the object. 385 */ 386 void 387 contract_rele(contract_t *ct) 388 { 389 uint64_t nref; 390 391 mutex_enter(&ct->ct_reflock); 392 ASSERT(ct->ct_ref > 0); 393 nref = --ct->ct_ref; 394 mutex_exit(&ct->ct_reflock); 395 if (nref == 0) { 396 /* 397 * ct_owner is cleared when it drops its reference. 398 */ 399 ASSERT(ct->ct_owner == NULL); 400 ASSERT(ct->ct_evcnt == 0); 401 402 /* 403 * Remove from global contract AVL 404 */ 405 mutex_enter(&contract_lock); 406 avl_remove(&contract_avl, ct); 407 mutex_exit(&contract_lock); 408 409 /* 410 * Remove from type AVL 411 */ 412 mutex_enter(&ct->ct_type->ct_type_lock); 413 avl_remove(&ct->ct_type->ct_type_avl, ct); 414 mutex_exit(&ct->ct_type->ct_type_lock); 415 416 /* 417 * Release the contract's ID 418 */ 419 id_free(contract_ids, ct->ct_id); 420 421 /* 422 * Release project hold 423 */ 424 mutex_enter(&contract_lock); 425 ct->ct_proj->kpj_data.kpd_contract--; 426 project_rele(ct->ct_proj); 427 mutex_exit(&contract_lock); 428 429 /* 430 * Free the contract 431 */ 432 contract_dtor(ct); 433 ct->ct_type->ct_type_ops->contop_free(ct); 434 } 435 } 436 437 /* 438 * contract_hold 439 * 440 * Adds a reference to a contract 441 */ 442 void 443 contract_hold(contract_t *ct) 444 { 445 mutex_enter(&ct->ct_reflock); 446 ASSERT(ct->ct_ref < UINT64_MAX); 447 ct->ct_ref++; 448 mutex_exit(&ct->ct_reflock); 449 } 450 451 /* 452 * contract_getzuniqid 453 * 454 * Get a contract's zone unique ID. Needed because 64-bit reads and 455 * writes aren't atomic on x86. Since there are contexts where we are 456 * unable to take ct_lock, we instead use ct_reflock; in actuality any 457 * lock would do. 458 */ 459 uint64_t 460 contract_getzuniqid(contract_t *ct) 461 { 462 uint64_t zuniqid; 463 464 mutex_enter(&ct->ct_reflock); 465 zuniqid = ct->ct_mzuniqid; 466 mutex_exit(&ct->ct_reflock); 467 468 return (zuniqid); 469 } 470 471 /* 472 * contract_setzuniqid 473 * 474 * Sets a contract's zone unique ID. See contract_getzuniqid. 475 */ 476 void 477 contract_setzuniqid(contract_t *ct, uint64_t zuniqid) 478 { 479 mutex_enter(&ct->ct_reflock); 480 ct->ct_mzuniqid = zuniqid; 481 mutex_exit(&ct->ct_reflock); 482 } 483 484 /* 485 * contract_abandon 486 * 487 * Abandons the specified contract. If "explicit" is clear, the 488 * contract was implicitly abandoned (by process exit) and should be 489 * inherited if its terms allow it and its owner was a member of a 490 * regent contract. Otherwise, the contract type's abandon entry point 491 * is invoked to either destroy or orphan the contract. 492 */ 493 int 494 contract_abandon(contract_t *ct, proc_t *p, int explicit) 495 { 496 ct_equeue_t *q = NULL; 497 contract_t *parent = &p->p_ct_process->conp_contract; 498 int inherit = 0; 499 500 VERIFY(p == curproc); 501 502 mutex_enter(&ct->ct_lock); 503 504 /* 505 * Multiple contract locks are taken contract -> subcontract. 506 * Check if the contract will be inherited so we can acquire 507 * all the necessary locks before making sensitive changes. 508 */ 509 if (!explicit && (ct->ct_flags & CTF_INHERIT) && 510 contract_process_accept(parent)) { 511 mutex_exit(&ct->ct_lock); 512 mutex_enter(&parent->ct_lock); 513 mutex_enter(&ct->ct_lock); 514 inherit = 1; 515 } 516 517 if (ct->ct_owner != p) { 518 mutex_exit(&ct->ct_lock); 519 if (inherit) 520 mutex_exit(&parent->ct_lock); 521 return (EINVAL); 522 } 523 524 mutex_enter(&p->p_lock); 525 if (explicit) 526 avl_remove(&p->p_ct_held, ct); 527 ct->ct_owner = NULL; 528 mutex_exit(&p->p_lock); 529 530 /* 531 * Since we can't call cte_trim with the contract lock held, 532 * we grab the queue pointer here. 533 */ 534 if (p->p_ct_equeue) 535 q = p->p_ct_equeue[ct->ct_type->ct_type_index]; 536 537 /* 538 * contop_abandon may destroy the contract so we rely on it to 539 * drop ct_lock. We retain a reference on the contract so that 540 * the cte_trim which follows functions properly. Even though 541 * cte_trim doesn't dereference the contract pointer, it is 542 * still necessary to retain a reference to the contract so 543 * that we don't trim events which are sent by a subsequently 544 * allocated contract infortuitously located at the same address. 545 */ 546 contract_hold(ct); 547 548 if (inherit) { 549 ct->ct_state = CTS_INHERITED; 550 VERIFY(ct->ct_regent == parent); 551 contract_process_take(parent, ct); 552 553 /* 554 * We are handing off the process's reference to the 555 * parent contract. For this reason, the order in 556 * which we drop the contract locks is also important. 557 */ 558 mutex_exit(&ct->ct_lock); 559 mutex_exit(&parent->ct_lock); 560 } else { 561 ct->ct_regent = NULL; 562 ct->ct_type->ct_type_ops->contop_abandon(ct); 563 } 564 565 /* 566 * ct_lock has been dropped; we can safely trim the event 567 * queue now. 568 */ 569 if (q) { 570 mutex_enter(&q->ctq_lock); 571 cte_trim(q, ct); 572 mutex_exit(&q->ctq_lock); 573 } 574 575 contract_rele(ct); 576 577 return (0); 578 } 579 580 int 581 contract_newct(contract_t *ct) 582 { 583 return (ct->ct_type->ct_type_ops->contop_newct(ct)); 584 } 585 586 /* 587 * contract_adopt 588 * 589 * Adopts a contract. After a successful call to this routine, the 590 * previously inherited contract will belong to the calling process, 591 * and its events will have been appended to its new owner's process 592 * bundle queue. 593 */ 594 int 595 contract_adopt(contract_t *ct, proc_t *p) 596 { 597 avl_index_t where; 598 ct_equeue_t *q; 599 contract_t *parent; 600 601 ASSERT(p == curproc); 602 603 /* 604 * Ensure the process has an event queue. Checked by ASSERTs 605 * below. 606 */ 607 (void) contract_type_pbundle(ct->ct_type, p); 608 609 mutex_enter(&ct->ct_lock); 610 parent = ct->ct_regent; 611 if (ct->ct_state != CTS_INHERITED || 612 &p->p_ct_process->conp_contract != parent || 613 p->p_zone->zone_uniqid != ct->ct_czuniqid) { 614 mutex_exit(&ct->ct_lock); 615 return (EINVAL); 616 } 617 618 /* 619 * Multiple contract locks are taken contract -> subcontract. 620 */ 621 mutex_exit(&ct->ct_lock); 622 mutex_enter(&parent->ct_lock); 623 mutex_enter(&ct->ct_lock); 624 625 /* 626 * It is possible that the contract was adopted by someone else 627 * while its lock was dropped. It isn't possible for the 628 * contract to have been inherited by a different regent 629 * contract. 630 */ 631 if (ct->ct_state != CTS_INHERITED) { 632 mutex_exit(&parent->ct_lock); 633 mutex_exit(&ct->ct_lock); 634 return (EBUSY); 635 } 636 ASSERT(ct->ct_regent == parent); 637 638 ct->ct_state = CTS_OWNED; 639 640 contract_process_adopt(ct, p); 641 642 mutex_enter(&p->p_lock); 643 ct->ct_owner = p; 644 VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL); 645 avl_insert(&p->p_ct_held, ct, where); 646 mutex_exit(&p->p_lock); 647 648 ASSERT(ct->ct_owner->p_ct_equeue); 649 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]); 650 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]; 651 cte_copy(&ct->ct_events, q); 652 mutex_exit(&ct->ct_lock); 653 654 return (0); 655 } 656 657 /* 658 * contract_ack 659 * 660 * Acknowledges receipt of a critical event. 661 */ 662 int 663 contract_ack(contract_t *ct, uint64_t evid, int ack) 664 { 665 ct_kevent_t *ev; 666 list_t *queue = &ct->ct_events.ctq_events; 667 int error = ESRCH; 668 int nego = 0; 669 uint_t evtype; 670 671 ASSERT(ack == CT_ACK || ack == CT_NACK); 672 673 mutex_enter(&ct->ct_lock); 674 mutex_enter(&ct->ct_events.ctq_lock); 675 /* 676 * We are probably ACKing something near the head of the queue. 677 */ 678 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 679 if (ev->cte_id == evid) { 680 if (ev->cte_flags & CTE_NEG) 681 nego = 1; 682 else if (ack == CT_NACK) 683 break; 684 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 685 ev->cte_flags |= CTE_ACK; 686 ct->ct_evcnt--; 687 evtype = ev->cte_type; 688 error = 0; 689 } 690 break; 691 } 692 } 693 mutex_exit(&ct->ct_events.ctq_lock); 694 mutex_exit(&ct->ct_lock); 695 696 /* 697 * Not all critical events are negotiation events, however 698 * every negotiation event is a critical event. NEGEND events 699 * are critical events but are not negotiation events 700 */ 701 if (error || !nego) 702 return (error); 703 704 if (ack == CT_ACK) 705 error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid); 706 else 707 error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid); 708 709 return (error); 710 } 711 712 /*ARGSUSED*/ 713 int 714 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid) 715 { 716 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u", 717 ct->ct_id); 718 return (ENOSYS); 719 } 720 721 /*ARGSUSED*/ 722 int 723 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid) 724 { 725 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u", 726 ct->ct_id); 727 return (ENOSYS); 728 } 729 730 /*ARGSUSED*/ 731 int 732 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid) 733 { 734 return (ERANGE); 735 } 736 737 /* 738 * contract_qack 739 * 740 * Asks that negotiations be extended by another time quantum 741 */ 742 int 743 contract_qack(contract_t *ct, uint64_t evid) 744 { 745 ct_kevent_t *ev; 746 list_t *queue = &ct->ct_events.ctq_events; 747 int nego = 0; 748 uint_t evtype; 749 750 mutex_enter(&ct->ct_lock); 751 mutex_enter(&ct->ct_events.ctq_lock); 752 753 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 754 if (ev->cte_id == evid) { 755 if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) { 756 evtype = ev->cte_type; 757 nego = 1; 758 } 759 break; 760 } 761 } 762 mutex_exit(&ct->ct_events.ctq_lock); 763 mutex_exit(&ct->ct_lock); 764 765 /* 766 * Only a negotiated event (which is by definition also a critical 767 * event) which has not yet been acknowledged can provide 768 * time quanta to a negotiating owner process. 769 */ 770 if (!nego) 771 return (ESRCH); 772 773 return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid)); 774 } 775 776 /* 777 * contract_orphan 778 * 779 * Icky-poo. This is a process-contract special, used to ACK all 780 * critical messages when a contract is orphaned. 781 */ 782 void 783 contract_orphan(contract_t *ct) 784 { 785 ct_kevent_t *ev; 786 list_t *queue = &ct->ct_events.ctq_events; 787 788 ASSERT(MUTEX_HELD(&ct->ct_lock)); 789 ASSERT(ct->ct_state != CTS_ORPHAN); 790 791 mutex_enter(&ct->ct_events.ctq_lock); 792 ct->ct_state = CTS_ORPHAN; 793 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 794 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 795 ev->cte_flags |= CTE_ACK; 796 ct->ct_evcnt--; 797 } 798 } 799 mutex_exit(&ct->ct_events.ctq_lock); 800 801 ASSERT(ct->ct_evcnt == 0); 802 } 803 804 /* 805 * contract_destroy 806 * 807 * Explicit contract destruction. Called when contract is empty. 808 * The contract will actually stick around until all of its events are 809 * removed from the bundle and and process bundle queues, and all fds 810 * which refer to it are closed. See contract_dtor if you are looking 811 * for what destroys the contract structure. 812 */ 813 void 814 contract_destroy(contract_t *ct) 815 { 816 ASSERT(MUTEX_HELD(&ct->ct_lock)); 817 ASSERT(ct->ct_state != CTS_DEAD); 818 ASSERT(ct->ct_owner == NULL); 819 820 ct->ct_state = CTS_DEAD; 821 cte_queue_drain(&ct->ct_events, 1); 822 mutex_exit(&ct->ct_lock); 823 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock); 824 cte_trim(&ct->ct_type->ct_type_events, ct); 825 mutex_exit(&ct->ct_type->ct_type_events.ctq_lock); 826 mutex_enter(&ct->ct_lock); 827 ct->ct_type->ct_type_ops->contop_destroy(ct); 828 mutex_exit(&ct->ct_lock); 829 contract_rele(ct); 830 } 831 832 /* 833 * contract_vnode_get 834 * 835 * Obtains the contract directory vnode for this contract, if there is 836 * one. The caller must VN_RELE the vnode when they are through using 837 * it. 838 */ 839 vnode_t * 840 contract_vnode_get(contract_t *ct, vfs_t *vfsp) 841 { 842 contract_vnode_t *ctv; 843 vnode_t *vp = NULL; 844 845 mutex_enter(&ct->ct_lock); 846 for (ctv = list_head(&ct->ct_vnodes); ctv != NULL; 847 ctv = list_next(&ct->ct_vnodes, ctv)) 848 if (ctv->ctv_vnode->v_vfsp == vfsp) { 849 vp = ctv->ctv_vnode; 850 VN_HOLD(vp); 851 break; 852 } 853 mutex_exit(&ct->ct_lock); 854 return (vp); 855 } 856 857 /* 858 * contract_vnode_set 859 * 860 * Sets the contract directory vnode for this contract. We don't hold 861 * a reference on the vnode because we don't want to prevent it from 862 * being freed. The vnode's inactive entry point will take care of 863 * notifying us when it should be removed. 864 */ 865 void 866 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode) 867 { 868 mutex_enter(&ct->ct_lock); 869 ctv->ctv_vnode = vnode; 870 list_insert_head(&ct->ct_vnodes, ctv); 871 mutex_exit(&ct->ct_lock); 872 } 873 874 /* 875 * contract_vnode_clear 876 * 877 * Removes this vnode as the contract directory vnode for this 878 * contract. Called from a contract directory's inactive entry point, 879 * this may return 0 indicating that the vnode gained another reference 880 * because of a simultaneous call to contract_vnode_get. 881 */ 882 int 883 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv) 884 { 885 vnode_t *vp = ctv->ctv_vnode; 886 int result; 887 888 mutex_enter(&ct->ct_lock); 889 mutex_enter(&vp->v_lock); 890 if (vp->v_count == 1) { 891 list_remove(&ct->ct_vnodes, ctv); 892 result = 1; 893 } else { 894 vp->v_count--; 895 result = 0; 896 } 897 mutex_exit(&vp->v_lock); 898 mutex_exit(&ct->ct_lock); 899 900 return (result); 901 } 902 903 /* 904 * contract_exit 905 * 906 * Abandons all contracts held by process p, and drains process p's 907 * bundle queues. Called on process exit. 908 */ 909 void 910 contract_exit(proc_t *p) 911 { 912 contract_t *ct; 913 void *cookie = NULL; 914 int i; 915 916 ASSERT(p == curproc); 917 918 /* 919 * Abandon held contracts. contract_abandon knows enough not 920 * to remove the contract from the list a second time. We are 921 * exiting, so no locks are needed here. But because 922 * contract_abandon will take p_lock, we need to make sure we 923 * aren't holding it. 924 */ 925 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 926 while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL) 927 VERIFY(contract_abandon(ct, p, 0) == 0); 928 929 /* 930 * Drain pbundles. Because a process bundle queue could have 931 * been passed to another process, they may not be freed right 932 * away. 933 */ 934 if (p->p_ct_equeue) { 935 for (i = 0; i < CTT_MAXTYPE; i++) 936 if (p->p_ct_equeue[i]) 937 cte_queue_drain(p->p_ct_equeue[i], 0); 938 kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *)); 939 } 940 } 941 942 static int 943 get_time_left(struct ct_time *t) 944 { 945 clock_t ticks_elapsed; 946 int secs_elapsed; 947 948 if (t->ctm_total == -1) 949 return (-1); 950 951 ticks_elapsed = ddi_get_lbolt() - t->ctm_start; 952 secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC); 953 return (secs_elapsed > 0 ? secs_elapsed : 0); 954 } 955 956 /* 957 * contract_status_common 958 * 959 * Populates a ct_status structure. Used by contract types in their 960 * status entry points and ctfs when only common information is 961 * requested. 962 */ 963 void 964 contract_status_common(contract_t *ct, zone_t *zone, void *status, 965 model_t model) 966 { 967 STRUCT_HANDLE(ct_status, lstatus); 968 969 STRUCT_SET_HANDLE(lstatus, model, status); 970 ASSERT(MUTEX_HELD(&ct->ct_lock)); 971 if (zone->zone_uniqid == GLOBAL_ZONEUNIQID || 972 zone->zone_uniqid == ct->ct_czuniqid) { 973 zone_t *czone; 974 zoneid_t zoneid = -1; 975 976 /* 977 * Contracts don't have holds on the zones they were 978 * created by. If the contract's zone no longer 979 * exists, we say its zoneid is -1. 980 */ 981 if (zone->zone_uniqid == ct->ct_czuniqid || 982 ct->ct_czuniqid == GLOBAL_ZONEUNIQID) { 983 zoneid = ct->ct_zoneid; 984 } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) { 985 if (czone->zone_uniqid == ct->ct_mzuniqid) 986 zoneid = ct->ct_zoneid; 987 zone_rele(czone); 988 } 989 990 STRUCT_FSET(lstatus, ctst_zoneid, zoneid); 991 STRUCT_FSET(lstatus, ctst_holder, 992 (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid : 993 (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0); 994 STRUCT_FSET(lstatus, ctst_state, ct->ct_state); 995 } else { 996 /* 997 * We are looking at a contract which was created by a 998 * process outside of our zone. We provide fake zone, 999 * holder, and state information. 1000 */ 1001 1002 STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id); 1003 /* 1004 * Since "zone" can't disappear until the calling ctfs 1005 * is unmounted, zone_zsched must be valid. 1006 */ 1007 STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ? 1008 zone->zone_zsched->p_pid : 0); 1009 STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ? 1010 CTS_OWNED : ct->ct_state); 1011 } 1012 STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt); 1013 STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime)); 1014 STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime)); 1015 STRUCT_FSET(lstatus, ctst_nevid, 1016 ct->ct_nevent ? ct->ct_nevent->cte_id : 0); 1017 STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit); 1018 STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info); 1019 STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie); 1020 STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index); 1021 STRUCT_FSET(lstatus, ctst_id, ct->ct_id); 1022 } 1023 1024 /* 1025 * contract_checkcred 1026 * 1027 * Determines if the specified contract is owned by a process with the 1028 * same effective uid as the specified credential. The caller must 1029 * ensure that the uid spaces are the same. Returns 1 on success. 1030 */ 1031 static int 1032 contract_checkcred(contract_t *ct, const cred_t *cr) 1033 { 1034 proc_t *p; 1035 int fail = 1; 1036 1037 mutex_enter(&ct->ct_lock); 1038 if ((p = ct->ct_owner) != NULL) { 1039 mutex_enter(&p->p_crlock); 1040 fail = crgetuid(cr) != crgetuid(p->p_cred); 1041 mutex_exit(&p->p_crlock); 1042 } 1043 mutex_exit(&ct->ct_lock); 1044 1045 return (!fail); 1046 } 1047 1048 /* 1049 * contract_owned 1050 * 1051 * Determines if the specified credential can view an event generated 1052 * by the specified contract. If locked is set, the contract's ct_lock 1053 * is held and the caller will need to do additional work to determine 1054 * if they truly can see the event. Returns 1 on success. 1055 */ 1056 int 1057 contract_owned(contract_t *ct, const cred_t *cr, int locked) 1058 { 1059 int owner, cmatch, zmatch; 1060 uint64_t zuniqid, mzuniqid; 1061 uid_t euid; 1062 1063 ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock)); 1064 1065 zuniqid = curproc->p_zone->zone_uniqid; 1066 mzuniqid = contract_getzuniqid(ct); 1067 euid = crgetuid(cr); 1068 1069 /* 1070 * owner: we own the contract 1071 * cmatch: we are in the creator's (and holder's) zone and our 1072 * uid matches the creator's or holder's 1073 * zmatch: we are in the effective zone of a contract created 1074 * in the global zone, and our uid matches that of the 1075 * virtualized holder's (zsched/kcred) 1076 */ 1077 owner = (ct->ct_owner == curproc); 1078 cmatch = (zuniqid == ct->ct_czuniqid) && 1079 ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr))); 1080 zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) && 1081 (crgetuid(kcred) == euid); 1082 1083 return (owner || cmatch || zmatch); 1084 } 1085 1086 1087 /* 1088 * contract_type_init 1089 * 1090 * Called by contract types to register themselves with the contracts 1091 * framework. 1092 */ 1093 ct_type_t * 1094 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops, 1095 ct_f_default_t *dfault) 1096 { 1097 ct_type_t *result; 1098 1099 ASSERT(type < CTT_MAXTYPE); 1100 1101 result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP); 1102 1103 mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL); 1104 avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t), 1105 offsetof(contract_t, ct_cttavl)); 1106 cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0); 1107 result->ct_type_name = name; 1108 result->ct_type_ops = ops; 1109 result->ct_type_default = dfault; 1110 result->ct_type_evid = 0; 1111 gethrestime(&result->ct_type_timestruc); 1112 result->ct_type_index = type; 1113 1114 ct_types[type] = result; 1115 1116 return (result); 1117 } 1118 1119 /* 1120 * contract_type_count 1121 * 1122 * Obtains the number of contracts of a particular type. 1123 */ 1124 int 1125 contract_type_count(ct_type_t *type) 1126 { 1127 ulong_t count; 1128 1129 mutex_enter(&type->ct_type_lock); 1130 count = avl_numnodes(&type->ct_type_avl); 1131 mutex_exit(&type->ct_type_lock); 1132 1133 return (count); 1134 } 1135 1136 /* 1137 * contract_type_max 1138 * 1139 * Obtains the maximum contract id of of a particular type. 1140 */ 1141 ctid_t 1142 contract_type_max(ct_type_t *type) 1143 { 1144 contract_t *ct; 1145 ctid_t res; 1146 1147 mutex_enter(&type->ct_type_lock); 1148 ct = avl_last(&type->ct_type_avl); 1149 res = ct ? ct->ct_id : -1; 1150 mutex_exit(&type->ct_type_lock); 1151 1152 return (res); 1153 } 1154 1155 /* 1156 * contract_max 1157 * 1158 * Obtains the maximum contract id. 1159 */ 1160 ctid_t 1161 contract_max(void) 1162 { 1163 contract_t *ct; 1164 ctid_t res; 1165 1166 mutex_enter(&contract_lock); 1167 ct = avl_last(&contract_avl); 1168 res = ct ? ct->ct_id : -1; 1169 mutex_exit(&contract_lock); 1170 1171 return (res); 1172 } 1173 1174 /* 1175 * contract_lookup_common 1176 * 1177 * Common code for contract_lookup and contract_type_lookup. Takes a 1178 * pointer to an AVL tree to search in. Should be called with the 1179 * appropriate tree-protecting lock held (unfortunately unassertable). 1180 */ 1181 static ctid_t 1182 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current) 1183 { 1184 contract_t template, *ct; 1185 avl_index_t where; 1186 ctid_t res; 1187 1188 template.ct_id = current; 1189 ct = avl_find(tree, &template, &where); 1190 if (ct == NULL) 1191 ct = avl_nearest(tree, where, AVL_AFTER); 1192 if (zuniqid != GLOBAL_ZONEUNIQID) 1193 while (ct && (contract_getzuniqid(ct) != zuniqid)) 1194 ct = AVL_NEXT(tree, ct); 1195 res = ct ? ct->ct_id : -1; 1196 1197 return (res); 1198 } 1199 1200 /* 1201 * contract_type_lookup 1202 * 1203 * Returns the next type contract after the specified id, visible from 1204 * the specified zone. 1205 */ 1206 ctid_t 1207 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current) 1208 { 1209 ctid_t res; 1210 1211 mutex_enter(&type->ct_type_lock); 1212 res = contract_lookup_common(&type->ct_type_avl, zuniqid, current); 1213 mutex_exit(&type->ct_type_lock); 1214 1215 return (res); 1216 } 1217 1218 /* 1219 * contract_lookup 1220 * 1221 * Returns the next contract after the specified id, visible from the 1222 * specified zone. 1223 */ 1224 ctid_t 1225 contract_lookup(uint64_t zuniqid, ctid_t current) 1226 { 1227 ctid_t res; 1228 1229 mutex_enter(&contract_lock); 1230 res = contract_lookup_common(&contract_avl, zuniqid, current); 1231 mutex_exit(&contract_lock); 1232 1233 return (res); 1234 } 1235 1236 /* 1237 * contract_plookup 1238 * 1239 * Returns the next contract held by process p after the specified id, 1240 * visible from the specified zone. Made complicated by the fact that 1241 * contracts visible in a zone but held by processes outside of the 1242 * zone need to appear as being held by zsched to zone members. 1243 */ 1244 ctid_t 1245 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid) 1246 { 1247 contract_t template, *ct; 1248 avl_index_t where; 1249 ctid_t res; 1250 1251 template.ct_id = current; 1252 if (zuniqid != GLOBAL_ZONEUNIQID && 1253 (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) { 1254 /* This is inelegant. */ 1255 mutex_enter(&contract_lock); 1256 ct = avl_find(&contract_avl, &template, &where); 1257 if (ct == NULL) 1258 ct = avl_nearest(&contract_avl, where, AVL_AFTER); 1259 while (ct && !(ct->ct_state < CTS_ORPHAN && 1260 contract_getzuniqid(ct) == zuniqid && 1261 ct->ct_czuniqid == GLOBAL_ZONEUNIQID)) 1262 ct = AVL_NEXT(&contract_avl, ct); 1263 res = ct ? ct->ct_id : -1; 1264 mutex_exit(&contract_lock); 1265 } else { 1266 mutex_enter(&p->p_lock); 1267 ct = avl_find(&p->p_ct_held, &template, &where); 1268 if (ct == NULL) 1269 ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER); 1270 res = ct ? ct->ct_id : -1; 1271 mutex_exit(&p->p_lock); 1272 } 1273 1274 return (res); 1275 } 1276 1277 /* 1278 * contract_ptr_common 1279 * 1280 * Common code for contract_ptr and contract_type_ptr. Takes a pointer 1281 * to an AVL tree to search in. Should be called with the appropriate 1282 * tree-protecting lock held (unfortunately unassertable). 1283 */ 1284 static contract_t * 1285 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid) 1286 { 1287 contract_t template, *ct; 1288 1289 template.ct_id = id; 1290 ct = avl_find(tree, &template, NULL); 1291 if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID && 1292 contract_getzuniqid(ct) != zuniqid)) { 1293 return (NULL); 1294 } 1295 1296 /* 1297 * Check to see if a thread is in the window in contract_rele 1298 * between dropping the reference count and removing the 1299 * contract from the type AVL. 1300 */ 1301 mutex_enter(&ct->ct_reflock); 1302 if (ct->ct_ref) { 1303 ct->ct_ref++; 1304 mutex_exit(&ct->ct_reflock); 1305 } else { 1306 mutex_exit(&ct->ct_reflock); 1307 ct = NULL; 1308 } 1309 1310 return (ct); 1311 } 1312 1313 /* 1314 * contract_type_ptr 1315 * 1316 * Returns a pointer to the contract with the specified id. The 1317 * contract is held, so the caller needs to release the reference when 1318 * it is through with the contract. 1319 */ 1320 contract_t * 1321 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid) 1322 { 1323 contract_t *ct; 1324 1325 mutex_enter(&type->ct_type_lock); 1326 ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid); 1327 mutex_exit(&type->ct_type_lock); 1328 1329 return (ct); 1330 } 1331 1332 /* 1333 * contract_ptr 1334 * 1335 * Returns a pointer to the contract with the specified id. The 1336 * contract is held, so the caller needs to release the reference when 1337 * it is through with the contract. 1338 */ 1339 contract_t * 1340 contract_ptr(ctid_t id, uint64_t zuniqid) 1341 { 1342 contract_t *ct; 1343 1344 mutex_enter(&contract_lock); 1345 ct = contract_ptr_common(&contract_avl, id, zuniqid); 1346 mutex_exit(&contract_lock); 1347 1348 return (ct); 1349 } 1350 1351 /* 1352 * contract_type_time 1353 * 1354 * Obtains the last time a contract of a particular type was created. 1355 */ 1356 void 1357 contract_type_time(ct_type_t *type, timestruc_t *time) 1358 { 1359 mutex_enter(&type->ct_type_lock); 1360 *time = type->ct_type_timestruc; 1361 mutex_exit(&type->ct_type_lock); 1362 } 1363 1364 /* 1365 * contract_type_bundle 1366 * 1367 * Obtains a type's bundle queue. 1368 */ 1369 ct_equeue_t * 1370 contract_type_bundle(ct_type_t *type) 1371 { 1372 return (&type->ct_type_events); 1373 } 1374 1375 /* 1376 * contract_type_pbundle 1377 * 1378 * Obtain's a process's bundle queue. If one doesn't exist, one is 1379 * created. Often used simply to ensure that a bundle queue is 1380 * allocated. 1381 */ 1382 ct_equeue_t * 1383 contract_type_pbundle(ct_type_t *type, proc_t *pp) 1384 { 1385 /* 1386 * If there isn't an array of bundle queues, allocate one. 1387 */ 1388 if (pp->p_ct_equeue == NULL) { 1389 size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *); 1390 ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP); 1391 1392 mutex_enter(&pp->p_lock); 1393 if (pp->p_ct_equeue) 1394 kmem_free(qa, size); 1395 else 1396 pp->p_ct_equeue = qa; 1397 mutex_exit(&pp->p_lock); 1398 } 1399 1400 /* 1401 * If there isn't a bundle queue of the required type, allocate 1402 * one. 1403 */ 1404 if (pp->p_ct_equeue[type->ct_type_index] == NULL) { 1405 ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP); 1406 cte_queue_create(q, CTEL_PBUNDLE, 20, 1); 1407 1408 mutex_enter(&pp->p_lock); 1409 if (pp->p_ct_equeue[type->ct_type_index]) 1410 cte_queue_drain(q, 0); 1411 else 1412 pp->p_ct_equeue[type->ct_type_index] = q; 1413 mutex_exit(&pp->p_lock); 1414 } 1415 1416 return (pp->p_ct_equeue[type->ct_type_index]); 1417 } 1418 1419 /* 1420 * ctparam_copyin 1421 * 1422 * copyin a ct_param_t for CT_TSET or CT_TGET commands. 1423 * If ctparam_copyout() is not called after ctparam_copyin(), then 1424 * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf. 1425 * 1426 * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get() 1427 * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a 1428 * process lock. 1429 */ 1430 int 1431 ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd) 1432 { 1433 uint32_t size; 1434 void *ubuf; 1435 ct_param_t *param = &kparam->param; 1436 STRUCT_DECL(ct_param, uarg); 1437 1438 STRUCT_INIT(uarg, flag); 1439 if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg))) 1440 return (EFAULT); 1441 size = STRUCT_FGET(uarg, ctpm_size); 1442 ubuf = STRUCT_FGETP(uarg, ctpm_value); 1443 1444 if (size > CT_PARAM_MAX_SIZE || size == 0) 1445 return (EINVAL); 1446 1447 kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP); 1448 if (cmd == CT_TSET) { 1449 if (copyin(ubuf, kparam->ctpm_kbuf, size)) { 1450 kmem_free(kparam->ctpm_kbuf, size); 1451 return (EFAULT); 1452 } 1453 } 1454 param->ctpm_id = STRUCT_FGET(uarg, ctpm_id); 1455 param->ctpm_size = size; 1456 param->ctpm_value = ubuf; 1457 kparam->ret_size = 0; 1458 1459 return (0); 1460 } 1461 1462 /* 1463 * ctparam_copyout 1464 * 1465 * copyout a ct_kparam_t and frees the buffer pointed by the member 1466 * ctpm_kbuf of ct_kparam_t 1467 */ 1468 int 1469 ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag) 1470 { 1471 int r = 0; 1472 ct_param_t *param = &kparam->param; 1473 STRUCT_DECL(ct_param, uarg); 1474 1475 STRUCT_INIT(uarg, flag); 1476 1477 STRUCT_FSET(uarg, ctpm_id, param->ctpm_id); 1478 STRUCT_FSET(uarg, ctpm_size, kparam->ret_size); 1479 STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value); 1480 if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) { 1481 r = EFAULT; 1482 goto error; 1483 } 1484 if (copyout(kparam->ctpm_kbuf, param->ctpm_value, 1485 MIN(kparam->ret_size, param->ctpm_size))) { 1486 r = EFAULT; 1487 } 1488 1489 error: 1490 kmem_free(kparam->ctpm_kbuf, param->ctpm_size); 1491 1492 return (r); 1493 } 1494 1495 /* 1496 * ctmpl_free 1497 * 1498 * Frees a template. 1499 */ 1500 void 1501 ctmpl_free(ct_template_t *template) 1502 { 1503 mutex_destroy(&template->ctmpl_lock); 1504 template->ctmpl_ops->ctop_free(template); 1505 } 1506 1507 /* 1508 * ctmpl_dup 1509 * 1510 * Creates a copy of a template. 1511 */ 1512 ct_template_t * 1513 ctmpl_dup(ct_template_t *template) 1514 { 1515 ct_template_t *new; 1516 1517 if (template == NULL) 1518 return (NULL); 1519 1520 new = template->ctmpl_ops->ctop_dup(template); 1521 /* 1522 * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and 1523 * should have remain held until now. 1524 */ 1525 mutex_exit(&template->ctmpl_lock); 1526 1527 return (new); 1528 } 1529 1530 /* 1531 * ctmpl_set 1532 * 1533 * Sets the requested terms of a template. 1534 */ 1535 int 1536 ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr) 1537 { 1538 int result = 0; 1539 ct_param_t *param = &kparam->param; 1540 uint64_t param_value; 1541 1542 if (param->ctpm_id == CTP_COOKIE || 1543 param->ctpm_id == CTP_EV_INFO || 1544 param->ctpm_id == CTP_EV_CRITICAL) { 1545 if (param->ctpm_size < sizeof (uint64_t)) { 1546 return (EINVAL); 1547 } else { 1548 param_value = *(uint64_t *)kparam->ctpm_kbuf; 1549 } 1550 } 1551 1552 mutex_enter(&template->ctmpl_lock); 1553 switch (param->ctpm_id) { 1554 case CTP_COOKIE: 1555 template->ctmpl_cookie = param_value; 1556 break; 1557 case CTP_EV_INFO: 1558 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) 1559 result = EINVAL; 1560 else 1561 template->ctmpl_ev_info = param_value; 1562 break; 1563 case CTP_EV_CRITICAL: 1564 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) { 1565 result = EINVAL; 1566 break; 1567 } else if ((~template->ctmpl_ev_crit & param_value) == 0) { 1568 /* 1569 * Assume that a pure reduction of the critical 1570 * set is allowed by the contract type. 1571 */ 1572 template->ctmpl_ev_crit = param_value; 1573 break; 1574 } 1575 /* 1576 * There may be restrictions on what we can make 1577 * critical, so we defer to the judgement of the 1578 * contract type. 1579 */ 1580 /* FALLTHROUGH */ 1581 default: 1582 result = template->ctmpl_ops->ctop_set(template, kparam, cr); 1583 } 1584 mutex_exit(&template->ctmpl_lock); 1585 1586 return (result); 1587 } 1588 1589 /* 1590 * ctmpl_get 1591 * 1592 * Obtains the requested terms from a template. 1593 * 1594 * If the term requested is a variable-sized term and the buffer 1595 * provided is too small for the data, we truncate the data and return 1596 * the buffer size necessary to fit the term in kparam->ret_size. If the 1597 * term requested is fix-sized (uint64_t) and the buffer provided is too 1598 * small, we return EINVAL. This should never happen if you're using 1599 * libcontract(3LIB), only if you call ioctl with a hand constructed 1600 * ct_param_t argument. 1601 * 1602 * Currently, only contract specific parameters have variable-sized 1603 * parameters. 1604 */ 1605 int 1606 ctmpl_get(ct_template_t *template, ct_kparam_t *kparam) 1607 { 1608 int result = 0; 1609 ct_param_t *param = &kparam->param; 1610 uint64_t *param_value; 1611 1612 if (param->ctpm_id == CTP_COOKIE || 1613 param->ctpm_id == CTP_EV_INFO || 1614 param->ctpm_id == CTP_EV_CRITICAL) { 1615 if (param->ctpm_size < sizeof (uint64_t)) { 1616 return (EINVAL); 1617 } else { 1618 param_value = kparam->ctpm_kbuf; 1619 kparam->ret_size = sizeof (uint64_t); 1620 } 1621 } 1622 1623 mutex_enter(&template->ctmpl_lock); 1624 switch (param->ctpm_id) { 1625 case CTP_COOKIE: 1626 *param_value = template->ctmpl_cookie; 1627 break; 1628 case CTP_EV_INFO: 1629 *param_value = template->ctmpl_ev_info; 1630 break; 1631 case CTP_EV_CRITICAL: 1632 *param_value = template->ctmpl_ev_crit; 1633 break; 1634 default: 1635 result = template->ctmpl_ops->ctop_get(template, kparam); 1636 } 1637 mutex_exit(&template->ctmpl_lock); 1638 1639 return (result); 1640 } 1641 1642 /* 1643 * ctmpl_makecurrent 1644 * 1645 * Used by ctmpl_activate and ctmpl_clear to set the current thread's 1646 * active template. Frees the old active template, if there was one. 1647 */ 1648 static void 1649 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new) 1650 { 1651 klwp_t *curlwp = ttolwp(curthread); 1652 proc_t *p = curproc; 1653 ct_template_t *old; 1654 1655 mutex_enter(&p->p_lock); 1656 old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index]; 1657 curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new; 1658 mutex_exit(&p->p_lock); 1659 1660 if (old) 1661 ctmpl_free(old); 1662 } 1663 1664 /* 1665 * ctmpl_activate 1666 * 1667 * Copy the specified template as the current thread's activate 1668 * template of that type. 1669 */ 1670 void 1671 ctmpl_activate(ct_template_t *template) 1672 { 1673 ctmpl_makecurrent(template, ctmpl_dup(template)); 1674 } 1675 1676 /* 1677 * ctmpl_clear 1678 * 1679 * Clears the current thread's activate template of the same type as 1680 * the specified template. 1681 */ 1682 void 1683 ctmpl_clear(ct_template_t *template) 1684 { 1685 ctmpl_makecurrent(template, NULL); 1686 } 1687 1688 /* 1689 * ctmpl_create 1690 * 1691 * Creates a new contract using the specified template. 1692 */ 1693 int 1694 ctmpl_create(ct_template_t *template, ctid_t *ctidp) 1695 { 1696 return (template->ctmpl_ops->ctop_create(template, ctidp)); 1697 } 1698 1699 /* 1700 * ctmpl_init 1701 * 1702 * Initializes the common portion of a new contract template. 1703 */ 1704 void 1705 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data) 1706 { 1707 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL); 1708 new->ctmpl_ops = ops; 1709 new->ctmpl_type = type; 1710 new->ctmpl_data = data; 1711 new->ctmpl_ev_info = new->ctmpl_ev_crit = 0; 1712 new->ctmpl_cookie = 0; 1713 } 1714 1715 /* 1716 * ctmpl_copy 1717 * 1718 * Copies the common portions of a contract template. Intended for use 1719 * by a contract type's ctop_dup template op. Returns with the old 1720 * template's lock held, which will should remain held until the 1721 * template op returns (it is dropped by ctmpl_dup). 1722 */ 1723 void 1724 ctmpl_copy(ct_template_t *new, ct_template_t *old) 1725 { 1726 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL); 1727 mutex_enter(&old->ctmpl_lock); 1728 new->ctmpl_ops = old->ctmpl_ops; 1729 new->ctmpl_type = old->ctmpl_type; 1730 new->ctmpl_ev_crit = old->ctmpl_ev_crit; 1731 new->ctmpl_ev_info = old->ctmpl_ev_info; 1732 new->ctmpl_cookie = old->ctmpl_cookie; 1733 } 1734 1735 /* 1736 * ctmpl_create_inval 1737 * 1738 * Returns EINVAL. Provided for the convenience of those contract 1739 * types which don't support ct_tmpl_create(3contract) and would 1740 * otherwise need to create their own stub for the ctop_create template 1741 * op. 1742 */ 1743 /*ARGSUSED*/ 1744 int 1745 ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp) 1746 { 1747 return (EINVAL); 1748 } 1749 1750 1751 /* 1752 * cte_queue_create 1753 * 1754 * Initializes a queue of a particular type. If dynamic is set, the 1755 * queue is to be freed when its last listener is removed after being 1756 * drained. 1757 */ 1758 static void 1759 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic) 1760 { 1761 mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL); 1762 q->ctq_listno = list; 1763 list_create(&q->ctq_events, sizeof (ct_kevent_t), 1764 offsetof(ct_kevent_t, cte_nodes[list].ctm_node)); 1765 list_create(&q->ctq_listeners, sizeof (ct_listener_t), 1766 offsetof(ct_listener_t, ctl_allnode)); 1767 list_create(&q->ctq_tail, sizeof (ct_listener_t), 1768 offsetof(ct_listener_t, ctl_tailnode)); 1769 gethrestime(&q->ctq_atime); 1770 q->ctq_nlisteners = 0; 1771 q->ctq_nreliable = 0; 1772 q->ctq_ninf = 0; 1773 q->ctq_max = maxinf; 1774 1775 /* 1776 * Bundle queues and contract queues are embedded in other 1777 * structures and are implicitly referenced counted by virtue 1778 * of their vnodes' indirect hold on their contracts. Process 1779 * bundle queues are dynamically allocated and may persist 1780 * after the death of the process, so they must be explicitly 1781 * reference counted. 1782 */ 1783 q->ctq_flags = dynamic ? CTQ_REFFED : 0; 1784 } 1785 1786 /* 1787 * cte_queue_destroy 1788 * 1789 * Destroys the specified queue. The queue is freed if referenced 1790 * counted. 1791 */ 1792 static void 1793 cte_queue_destroy(ct_equeue_t *q) 1794 { 1795 ASSERT(q->ctq_flags & CTQ_DEAD); 1796 ASSERT(q->ctq_nlisteners == 0); 1797 ASSERT(q->ctq_nreliable == 0); 1798 list_destroy(&q->ctq_events); 1799 list_destroy(&q->ctq_listeners); 1800 list_destroy(&q->ctq_tail); 1801 mutex_destroy(&q->ctq_lock); 1802 if (q->ctq_flags & CTQ_REFFED) 1803 kmem_free(q, sizeof (ct_equeue_t)); 1804 } 1805 1806 /* 1807 * cte_hold 1808 * 1809 * Takes a hold on the specified event. 1810 */ 1811 static void 1812 cte_hold(ct_kevent_t *e) 1813 { 1814 mutex_enter(&e->cte_lock); 1815 ASSERT(e->cte_refs > 0); 1816 e->cte_refs++; 1817 mutex_exit(&e->cte_lock); 1818 } 1819 1820 /* 1821 * cte_rele 1822 * 1823 * Releases a hold on the specified event. If the caller had the last 1824 * reference, frees the event and releases its hold on the contract 1825 * that generated it. 1826 */ 1827 static void 1828 cte_rele(ct_kevent_t *e) 1829 { 1830 mutex_enter(&e->cte_lock); 1831 ASSERT(e->cte_refs > 0); 1832 if (--e->cte_refs) { 1833 mutex_exit(&e->cte_lock); 1834 return; 1835 } 1836 1837 contract_rele(e->cte_contract); 1838 1839 mutex_destroy(&e->cte_lock); 1840 nvlist_free(e->cte_data); 1841 nvlist_free(e->cte_gdata); 1842 kmem_free(e, sizeof (ct_kevent_t)); 1843 } 1844 1845 /* 1846 * cte_qrele 1847 * 1848 * Remove this listener's hold on the specified event, removing and 1849 * releasing the queue's hold on the event if appropriate. 1850 */ 1851 static void 1852 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e) 1853 { 1854 ct_member_t *member = &e->cte_nodes[q->ctq_listno]; 1855 1856 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1857 1858 if (l->ctl_flags & CTLF_RELIABLE) 1859 member->ctm_nreliable--; 1860 if ((--member->ctm_refs == 0) && member->ctm_trimmed) { 1861 member->ctm_trimmed = 0; 1862 list_remove(&q->ctq_events, e); 1863 cte_rele(e); 1864 } 1865 } 1866 1867 /* 1868 * cte_qmove 1869 * 1870 * Move this listener to the specified event in the queue. 1871 */ 1872 static ct_kevent_t * 1873 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e) 1874 { 1875 ct_kevent_t *olde; 1876 1877 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1878 ASSERT(l->ctl_equeue == q); 1879 1880 if ((olde = l->ctl_position) == NULL) 1881 list_remove(&q->ctq_tail, l); 1882 1883 while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed) 1884 e = list_next(&q->ctq_events, e); 1885 1886 if (e != NULL) { 1887 e->cte_nodes[q->ctq_listno].ctm_refs++; 1888 if (l->ctl_flags & CTLF_RELIABLE) 1889 e->cte_nodes[q->ctq_listno].ctm_nreliable++; 1890 } else { 1891 list_insert_tail(&q->ctq_tail, l); 1892 } 1893 1894 l->ctl_position = e; 1895 if (olde) 1896 cte_qrele(q, l, olde); 1897 1898 return (e); 1899 } 1900 1901 /* 1902 * cte_checkcred 1903 * 1904 * Determines if the specified event's contract is owned by a process 1905 * with the same effective uid as the specified credential. Called 1906 * after a failed call to contract_owned with locked set. Because it 1907 * drops the queue lock, its caller (cte_qreadable) needs to make sure 1908 * we're still in the same place after we return. Returns 1 on 1909 * success. 1910 */ 1911 static int 1912 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr) 1913 { 1914 int result; 1915 contract_t *ct = e->cte_contract; 1916 1917 cte_hold(e); 1918 mutex_exit(&q->ctq_lock); 1919 result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid && 1920 contract_checkcred(ct, cr); 1921 mutex_enter(&q->ctq_lock); 1922 cte_rele(e); 1923 1924 return (result); 1925 } 1926 1927 /* 1928 * cte_qreadable 1929 * 1930 * Ensures that the listener is pointing to a valid event that the 1931 * caller has the credentials to read. Returns 0 if we can read the 1932 * event we're pointing to. 1933 */ 1934 static int 1935 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr, 1936 uint64_t zuniqid, int crit) 1937 { 1938 ct_kevent_t *e, *next; 1939 contract_t *ct; 1940 1941 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1942 ASSERT(l->ctl_equeue == q); 1943 1944 if (l->ctl_flags & CTLF_COPYOUT) 1945 return (1); 1946 1947 next = l->ctl_position; 1948 while (e = cte_qmove(q, l, next)) { 1949 ct = e->cte_contract; 1950 /* 1951 * Check obvious things first. If we are looking for a 1952 * critical message, is this one? If we aren't in the 1953 * global zone, is this message meant for us? 1954 */ 1955 if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) || 1956 (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID && 1957 zuniqid != contract_getzuniqid(ct))) { 1958 1959 next = list_next(&q->ctq_events, e); 1960 1961 /* 1962 * Next, see if our effective uid equals that of owner 1963 * or author of the contract. Since we are holding the 1964 * queue lock, contract_owned can't always check if we 1965 * have the same effective uid as the contract's 1966 * owner. If it comes to that, it fails and we take 1967 * the slow(er) path. 1968 */ 1969 } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) { 1970 1971 /* 1972 * At this point we either don't have any claim 1973 * to this contract or we match the effective 1974 * uid of the owner but couldn't tell. We 1975 * first test for a NULL holder so that events 1976 * from orphans and inherited contracts avoid 1977 * the penalty phase. 1978 */ 1979 if (e->cte_contract->ct_owner == NULL && 1980 !secpolicy_contract_observer_choice(cr)) 1981 next = list_next(&q->ctq_events, e); 1982 1983 /* 1984 * cte_checkcred will juggle locks to see if we 1985 * have the same uid as the event's contract's 1986 * current owner. If it succeeds, we have to 1987 * make sure we are in the same point in the 1988 * queue. 1989 */ 1990 else if (cte_checkcred(q, e, cr) && 1991 l->ctl_position == e) 1992 break; 1993 1994 /* 1995 * cte_checkcred failed; see if we're in the 1996 * same place. 1997 */ 1998 else if (l->ctl_position == e) 1999 if (secpolicy_contract_observer_choice(cr)) 2000 break; 2001 else 2002 next = list_next(&q->ctq_events, e); 2003 2004 /* 2005 * cte_checkcred failed, and our position was 2006 * changed. Start from there. 2007 */ 2008 else 2009 next = l->ctl_position; 2010 } else { 2011 break; 2012 } 2013 } 2014 2015 /* 2016 * We check for CTLF_COPYOUT again in case we dropped the queue 2017 * lock in cte_checkcred. 2018 */ 2019 return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL)); 2020 } 2021 2022 /* 2023 * cte_qwakeup 2024 * 2025 * Wakes up any waiting listeners and points them at the specified event. 2026 */ 2027 static void 2028 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e) 2029 { 2030 ct_listener_t *l; 2031 2032 ASSERT(MUTEX_HELD(&q->ctq_lock)); 2033 2034 while (l = list_head(&q->ctq_tail)) { 2035 list_remove(&q->ctq_tail, l); 2036 e->cte_nodes[q->ctq_listno].ctm_refs++; 2037 if (l->ctl_flags & CTLF_RELIABLE) 2038 e->cte_nodes[q->ctq_listno].ctm_nreliable++; 2039 l->ctl_position = e; 2040 cv_signal(&l->ctl_cv); 2041 pollwakeup(&l->ctl_pollhead, POLLIN); 2042 } 2043 } 2044 2045 /* 2046 * cte_copy 2047 * 2048 * Copies events from the specified contract event queue to the 2049 * end of the specified process bundle queue. Only called from 2050 * contract_adopt. 2051 * 2052 * We copy to the end of the target queue instead of mixing the events 2053 * in their proper order because otherwise the act of adopting a 2054 * contract would require a process to reset all process bundle 2055 * listeners it needed to see the new events. This would, in turn, 2056 * require the process to keep track of which preexisting events had 2057 * already been processed. 2058 */ 2059 static void 2060 cte_copy(ct_equeue_t *q, ct_equeue_t *newq) 2061 { 2062 ct_kevent_t *e, *first = NULL; 2063 2064 VERIFY(q->ctq_listno == CTEL_CONTRACT); 2065 VERIFY(newq->ctq_listno == CTEL_PBUNDLE); 2066 2067 mutex_enter(&q->ctq_lock); 2068 mutex_enter(&newq->ctq_lock); 2069 2070 /* 2071 * For now, only copy critical events. 2072 */ 2073 for (e = list_head(&q->ctq_events); e != NULL; 2074 e = list_next(&q->ctq_events, e)) { 2075 if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 2076 if (first == NULL) 2077 first = e; 2078 /* 2079 * It is possible for adoption to race with an owner's 2080 * cte_publish_all(); we must only enqueue events that 2081 * have not already been enqueued. 2082 */ 2083 if (!list_link_active((list_node_t *) 2084 ((uintptr_t)e + newq->ctq_events.list_offset))) { 2085 list_insert_tail(&newq->ctq_events, e); 2086 cte_hold(e); 2087 } 2088 } 2089 } 2090 2091 mutex_exit(&q->ctq_lock); 2092 2093 if (first) 2094 cte_qwakeup(newq, first); 2095 2096 mutex_exit(&newq->ctq_lock); 2097 } 2098 2099 /* 2100 * cte_trim 2101 * 2102 * Trims unneeded events from an event queue. Algorithm works as 2103 * follows: 2104 * 2105 * Removes all informative and acknowledged critical events until the 2106 * first referenced event is found. 2107 * 2108 * If a contract is specified, removes all events (regardless of 2109 * acknowledgement) generated by that contract until the first event 2110 * referenced by a reliable listener is found. Reference events are 2111 * removed by marking them "trimmed". Such events will be removed 2112 * when the last reference is dropped and will be skipped by future 2113 * listeners. 2114 * 2115 * This is pretty basic. Ideally this should remove from the middle of 2116 * the list (i.e. beyond the first referenced event), and even 2117 * referenced events. 2118 */ 2119 static void 2120 cte_trim(ct_equeue_t *q, contract_t *ct) 2121 { 2122 ct_kevent_t *e, *next; 2123 int flags, stopper; 2124 int start = 1; 2125 2126 VERIFY(MUTEX_HELD(&q->ctq_lock)); 2127 2128 for (e = list_head(&q->ctq_events); e != NULL; e = next) { 2129 next = list_next(&q->ctq_events, e); 2130 flags = e->cte_flags; 2131 stopper = (q->ctq_listno != CTEL_PBUNDLE) && 2132 (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0); 2133 if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) { 2134 if ((start && (flags & (CTE_INFO | CTE_ACK))) || 2135 (e->cte_contract == ct)) { 2136 /* 2137 * Toss informative and ACKed critical messages. 2138 */ 2139 list_remove(&q->ctq_events, e); 2140 cte_rele(e); 2141 } 2142 } else if ((e->cte_contract == ct) && !stopper) { 2143 ASSERT(q->ctq_nlisteners != 0); 2144 e->cte_nodes[q->ctq_listno].ctm_trimmed = 1; 2145 } else if (ct && !stopper) { 2146 start = 0; 2147 } else { 2148 /* 2149 * Don't free messages past the first reader. 2150 */ 2151 break; 2152 } 2153 } 2154 } 2155 2156 /* 2157 * cte_queue_drain 2158 * 2159 * Drain all events from the specified queue, and mark it dead. If 2160 * "ack" is set, acknowledge any critical events we find along the 2161 * way. 2162 */ 2163 static void 2164 cte_queue_drain(ct_equeue_t *q, int ack) 2165 { 2166 ct_kevent_t *e, *next; 2167 ct_listener_t *l; 2168 2169 mutex_enter(&q->ctq_lock); 2170 2171 for (e = list_head(&q->ctq_events); e != NULL; e = next) { 2172 next = list_next(&q->ctq_events, e); 2173 if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) { 2174 /* 2175 * Make sure critical messages are eventually 2176 * removed from the bundle queues. 2177 */ 2178 mutex_enter(&e->cte_lock); 2179 e->cte_flags |= CTE_ACK; 2180 mutex_exit(&e->cte_lock); 2181 ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock)); 2182 e->cte_contract->ct_evcnt--; 2183 } 2184 list_remove(&q->ctq_events, e); 2185 e->cte_nodes[q->ctq_listno].ctm_refs = 0; 2186 e->cte_nodes[q->ctq_listno].ctm_nreliable = 0; 2187 e->cte_nodes[q->ctq_listno].ctm_trimmed = 0; 2188 cte_rele(e); 2189 } 2190 2191 /* 2192 * This is necessary only because of CTEL_PBUNDLE listeners; 2193 * the events they point to can move from one pbundle to 2194 * another. Fortunately, this only happens if the contract is 2195 * inherited, which (in turn) only happens if the process 2196 * exits, which means it's an all-or-nothing deal. If this 2197 * wasn't the case, we would instead need to keep track of 2198 * listeners on a per-event basis, not just a per-queue basis. 2199 * This would have the side benefit of letting us clean up 2200 * trimmed events sooner (i.e. immediately), but would 2201 * unfortunately make events even bigger than they already 2202 * are. 2203 */ 2204 for (l = list_head(&q->ctq_listeners); l; 2205 l = list_next(&q->ctq_listeners, l)) { 2206 l->ctl_flags |= CTLF_DEAD; 2207 if (l->ctl_position) { 2208 l->ctl_position = NULL; 2209 list_insert_tail(&q->ctq_tail, l); 2210 } 2211 cv_broadcast(&l->ctl_cv); 2212 } 2213 2214 /* 2215 * Disallow events. 2216 */ 2217 q->ctq_flags |= CTQ_DEAD; 2218 2219 /* 2220 * If we represent the last reference to a reference counted 2221 * process bundle queue, free it. 2222 */ 2223 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0)) 2224 cte_queue_destroy(q); 2225 else 2226 mutex_exit(&q->ctq_lock); 2227 } 2228 2229 /* 2230 * cte_publish 2231 * 2232 * Publishes an event to a specific queue. Only called by 2233 * cte_publish_all. 2234 */ 2235 static void 2236 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist) 2237 { 2238 ASSERT(MUTEX_HELD(&q->ctq_lock)); 2239 2240 q->ctq_atime = *tsp; 2241 2242 /* 2243 * If this event may already exist on this queue, check to see if it 2244 * is already there and return if so. 2245 */ 2246 if (mayexist && list_link_active((list_node_t *)((uintptr_t)e + 2247 q->ctq_events.list_offset))) { 2248 mutex_exit(&q->ctq_lock); 2249 cte_rele(e); 2250 return; 2251 } 2252 2253 /* 2254 * Don't publish if the event is informative and there aren't 2255 * any listeners, or if the queue has been shut down. 2256 */ 2257 if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) || 2258 (q->ctq_flags & CTQ_DEAD)) { 2259 mutex_exit(&q->ctq_lock); 2260 cte_rele(e); 2261 return; 2262 } 2263 2264 /* 2265 * Enqueue event 2266 */ 2267 VERIFY(!list_link_active((list_node_t *) 2268 ((uintptr_t)e + q->ctq_events.list_offset))); 2269 list_insert_tail(&q->ctq_events, e); 2270 2271 /* 2272 * Check for waiting listeners 2273 */ 2274 cte_qwakeup(q, e); 2275 2276 /* 2277 * Trim unnecessary events from the queue. 2278 */ 2279 cte_trim(q, NULL); 2280 mutex_exit(&q->ctq_lock); 2281 } 2282 2283 /* 2284 * cte_publish_all 2285 * 2286 * Publish an event to all necessary event queues. The event, e, must 2287 * be zallocated by the caller, and the event's flags and type must be 2288 * set. The rest of the event's fields are initialized here. 2289 */ 2290 uint64_t 2291 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata) 2292 { 2293 ct_equeue_t *q; 2294 timespec_t ts; 2295 uint64_t evid; 2296 ct_kevent_t *negev; 2297 int negend; 2298 2299 e->cte_contract = ct; 2300 e->cte_data = data; 2301 e->cte_gdata = gdata; 2302 e->cte_refs = 3; 2303 evid = e->cte_id = atomic_inc_64_nv(&ct->ct_type->ct_type_evid); 2304 contract_hold(ct); 2305 2306 /* 2307 * For a negotiation event we set the ct->ct_nevent field of the 2308 * contract for the duration of the negotiation 2309 */ 2310 negend = 0; 2311 if (e->cte_flags & CTE_NEG) { 2312 cte_hold(e); 2313 ct->ct_nevent = e; 2314 } else if (e->cte_type == CT_EV_NEGEND) { 2315 negend = 1; 2316 } 2317 2318 gethrestime(&ts); 2319 2320 /* 2321 * ct_evtlock simply (and only) ensures that two events sent 2322 * from the same contract are delivered to all queues in the 2323 * same order. 2324 */ 2325 mutex_enter(&ct->ct_evtlock); 2326 2327 /* 2328 * CTEL_CONTRACT - First deliver to the contract queue, acking 2329 * the event if the contract has been orphaned. 2330 */ 2331 mutex_enter(&ct->ct_lock); 2332 mutex_enter(&ct->ct_events.ctq_lock); 2333 if ((e->cte_flags & CTE_INFO) == 0) { 2334 if (ct->ct_state >= CTS_ORPHAN) 2335 e->cte_flags |= CTE_ACK; 2336 else 2337 ct->ct_evcnt++; 2338 } 2339 mutex_exit(&ct->ct_lock); 2340 cte_publish(&ct->ct_events, e, &ts, B_FALSE); 2341 2342 /* 2343 * CTEL_BUNDLE - Next deliver to the contract type's bundle 2344 * queue. 2345 */ 2346 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock); 2347 cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE); 2348 2349 /* 2350 * CTEL_PBUNDLE - Finally, if the contract has an owner, 2351 * deliver to the owner's process bundle queue. 2352 */ 2353 mutex_enter(&ct->ct_lock); 2354 if (ct->ct_owner) { 2355 /* 2356 * proc_exit doesn't free event queues until it has 2357 * abandoned all contracts. 2358 */ 2359 ASSERT(ct->ct_owner->p_ct_equeue); 2360 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]); 2361 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]; 2362 mutex_enter(&q->ctq_lock); 2363 mutex_exit(&ct->ct_lock); 2364 2365 /* 2366 * It is possible for this code to race with adoption; we 2367 * publish the event indicating that the event may already 2368 * be enqueued because adoption beat us to it (in which case 2369 * cte_pubish() does nothing). 2370 */ 2371 cte_publish(q, e, &ts, B_TRUE); 2372 } else { 2373 mutex_exit(&ct->ct_lock); 2374 cte_rele(e); 2375 } 2376 2377 if (negend) { 2378 mutex_enter(&ct->ct_lock); 2379 negev = ct->ct_nevent; 2380 ct->ct_nevent = NULL; 2381 cte_rele(negev); 2382 mutex_exit(&ct->ct_lock); 2383 } 2384 2385 mutex_exit(&ct->ct_evtlock); 2386 2387 return (evid); 2388 } 2389 2390 /* 2391 * cte_add_listener 2392 * 2393 * Add a new listener to an event queue. 2394 */ 2395 void 2396 cte_add_listener(ct_equeue_t *q, ct_listener_t *l) 2397 { 2398 cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL); 2399 l->ctl_equeue = q; 2400 l->ctl_position = NULL; 2401 l->ctl_flags = 0; 2402 2403 mutex_enter(&q->ctq_lock); 2404 list_insert_head(&q->ctq_tail, l); 2405 list_insert_head(&q->ctq_listeners, l); 2406 q->ctq_nlisteners++; 2407 mutex_exit(&q->ctq_lock); 2408 } 2409 2410 /* 2411 * cte_remove_listener 2412 * 2413 * Remove a listener from an event queue. No other queue activities 2414 * (e.g. cte_get event) may be in progress at this endpoint when this 2415 * is called. 2416 */ 2417 void 2418 cte_remove_listener(ct_listener_t *l) 2419 { 2420 ct_equeue_t *q = l->ctl_equeue; 2421 ct_kevent_t *e; 2422 2423 mutex_enter(&q->ctq_lock); 2424 2425 ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0); 2426 2427 if ((e = l->ctl_position) != NULL) 2428 cte_qrele(q, l, e); 2429 else 2430 list_remove(&q->ctq_tail, l); 2431 l->ctl_position = NULL; 2432 2433 q->ctq_nlisteners--; 2434 list_remove(&q->ctq_listeners, l); 2435 2436 if (l->ctl_flags & CTLF_RELIABLE) 2437 q->ctq_nreliable--; 2438 2439 /* 2440 * If we are a the last listener of a dead reference counted 2441 * queue (i.e. a process bundle) we free it. Otherwise we just 2442 * trim any events which may have been kept around for our 2443 * benefit. 2444 */ 2445 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) && 2446 (q->ctq_nlisteners == 0)) { 2447 cte_queue_destroy(q); 2448 } else { 2449 cte_trim(q, NULL); 2450 mutex_exit(&q->ctq_lock); 2451 } 2452 } 2453 2454 /* 2455 * cte_reset_listener 2456 * 2457 * Moves a listener's queue pointer to the beginning of the queue. 2458 */ 2459 void 2460 cte_reset_listener(ct_listener_t *l) 2461 { 2462 ct_equeue_t *q = l->ctl_equeue; 2463 2464 mutex_enter(&q->ctq_lock); 2465 2466 /* 2467 * We allow an asynchronous reset because it doesn't make a 2468 * whole lot of sense to make reset block or fail. We already 2469 * have most of the mechanism needed thanks to queue trimming, 2470 * so implementing it isn't a big deal. 2471 */ 2472 if (l->ctl_flags & CTLF_COPYOUT) 2473 l->ctl_flags |= CTLF_RESET; 2474 2475 (void) cte_qmove(q, l, list_head(&q->ctq_events)); 2476 2477 /* 2478 * Inform blocked readers. 2479 */ 2480 cv_broadcast(&l->ctl_cv); 2481 pollwakeup(&l->ctl_pollhead, POLLIN); 2482 mutex_exit(&q->ctq_lock); 2483 } 2484 2485 /* 2486 * cte_next_event 2487 * 2488 * Moves the event pointer for the specified listener to the next event 2489 * on the queue. To avoid races, this movement only occurs if the 2490 * specified event id matches that of the current event. This is used 2491 * primarily to skip events that have been read but whose extended data 2492 * haven't been copied out. 2493 */ 2494 int 2495 cte_next_event(ct_listener_t *l, uint64_t id) 2496 { 2497 ct_equeue_t *q = l->ctl_equeue; 2498 ct_kevent_t *old; 2499 2500 mutex_enter(&q->ctq_lock); 2501 2502 if (l->ctl_flags & CTLF_COPYOUT) 2503 l->ctl_flags |= CTLF_RESET; 2504 2505 if (((old = l->ctl_position) != NULL) && (old->cte_id == id)) 2506 (void) cte_qmove(q, l, list_next(&q->ctq_events, old)); 2507 2508 mutex_exit(&q->ctq_lock); 2509 2510 return (0); 2511 } 2512 2513 /* 2514 * cte_get_event 2515 * 2516 * Reads an event from an event endpoint. If "nonblock" is clear, we 2517 * block until a suitable event is ready. If "crit" is set, we only 2518 * read critical events. Note that while "cr" is the caller's cred, 2519 * "zuniqid" is the unique id of the zone the calling contract 2520 * filesystem was mounted in. 2521 */ 2522 int 2523 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr, 2524 uint64_t zuniqid, int crit) 2525 { 2526 ct_equeue_t *q = l->ctl_equeue; 2527 ct_kevent_t *temp; 2528 int result = 0; 2529 int partial = 0; 2530 size_t size, gsize, len; 2531 model_t mdl = get_udatamodel(); 2532 STRUCT_DECL(ct_event, ev); 2533 STRUCT_INIT(ev, mdl); 2534 2535 /* 2536 * cte_qreadable checks for CTLF_COPYOUT as well as ensures 2537 * that there exists, and we are pointing to, an appropriate 2538 * event. It may temporarily drop ctq_lock, but that doesn't 2539 * really matter to us. 2540 */ 2541 mutex_enter(&q->ctq_lock); 2542 while (cte_qreadable(q, l, cr, zuniqid, crit)) { 2543 if (nonblock) { 2544 result = EAGAIN; 2545 goto error; 2546 } 2547 if (q->ctq_flags & CTQ_DEAD) { 2548 result = EIDRM; 2549 goto error; 2550 } 2551 result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock); 2552 if (result == 0) { 2553 result = EINTR; 2554 goto error; 2555 } 2556 } 2557 temp = l->ctl_position; 2558 cte_hold(temp); 2559 l->ctl_flags |= CTLF_COPYOUT; 2560 mutex_exit(&q->ctq_lock); 2561 2562 /* 2563 * We now have an event. Copy in the user event structure to 2564 * see how much space we have to work with. 2565 */ 2566 result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev)); 2567 if (result) 2568 goto copyerr; 2569 2570 /* 2571 * Determine what data we have and what the user should be 2572 * allowed to see. 2573 */ 2574 size = gsize = 0; 2575 if (temp->cte_data) { 2576 VERIFY(nvlist_size(temp->cte_data, &size, 2577 NV_ENCODE_NATIVE) == 0); 2578 ASSERT(size != 0); 2579 } 2580 if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) { 2581 VERIFY(nvlist_size(temp->cte_gdata, &gsize, 2582 NV_ENCODE_NATIVE) == 0); 2583 ASSERT(gsize != 0); 2584 } 2585 2586 /* 2587 * If we have enough space, copy out the extended event data. 2588 */ 2589 len = size + gsize; 2590 if (len) { 2591 if (STRUCT_FGET(ev, ctev_nbytes) >= len) { 2592 char *buf = kmem_alloc(len, KM_SLEEP); 2593 2594 if (size) 2595 VERIFY(nvlist_pack(temp->cte_data, &buf, &size, 2596 NV_ENCODE_NATIVE, KM_SLEEP) == 0); 2597 if (gsize) { 2598 char *tmp = buf + size; 2599 2600 VERIFY(nvlist_pack(temp->cte_gdata, &tmp, 2601 &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0); 2602 } 2603 2604 /* This shouldn't have changed */ 2605 ASSERT(size + gsize == len); 2606 result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer), 2607 len); 2608 kmem_free(buf, len); 2609 if (result) 2610 goto copyerr; 2611 } else { 2612 partial = 1; 2613 } 2614 } 2615 2616 /* 2617 * Copy out the common event data. 2618 */ 2619 STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id); 2620 STRUCT_FSET(ev, ctev_evid, temp->cte_id); 2621 STRUCT_FSET(ev, ctev_cttype, 2622 temp->cte_contract->ct_type->ct_type_index); 2623 STRUCT_FSET(ev, ctev_flags, temp->cte_flags & 2624 (CTE_ACK|CTE_INFO|CTE_NEG)); 2625 STRUCT_FSET(ev, ctev_type, temp->cte_type); 2626 STRUCT_FSET(ev, ctev_nbytes, len); 2627 STRUCT_FSET(ev, ctev_goffset, size); 2628 result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev)); 2629 2630 copyerr: 2631 /* 2632 * Only move our location in the queue if all copyouts were 2633 * successful, the caller provided enough space for the entire 2634 * event, and our endpoint wasn't reset or otherwise moved by 2635 * another thread. 2636 */ 2637 mutex_enter(&q->ctq_lock); 2638 if (result) 2639 result = EFAULT; 2640 else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) && 2641 (l->ctl_position == temp)) 2642 (void) cte_qmove(q, l, list_next(&q->ctq_events, temp)); 2643 l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET); 2644 /* 2645 * Signal any readers blocked on our CTLF_COPYOUT. 2646 */ 2647 cv_signal(&l->ctl_cv); 2648 cte_rele(temp); 2649 2650 error: 2651 mutex_exit(&q->ctq_lock); 2652 return (result); 2653 } 2654 2655 /* 2656 * cte_set_reliable 2657 * 2658 * Requests that events be reliably delivered to an event endpoint. 2659 * Unread informative and acknowledged critical events will not be 2660 * removed from the queue until this listener reads or skips them. 2661 * Because a listener could maliciously request reliable delivery and 2662 * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the 2663 * caller's effective set. 2664 */ 2665 int 2666 cte_set_reliable(ct_listener_t *l, const cred_t *cr) 2667 { 2668 ct_equeue_t *q = l->ctl_equeue; 2669 int error; 2670 2671 if ((error = secpolicy_contract_event(cr)) != 0) 2672 return (error); 2673 2674 mutex_enter(&q->ctq_lock); 2675 if ((l->ctl_flags & CTLF_RELIABLE) == 0) { 2676 l->ctl_flags |= CTLF_RELIABLE; 2677 q->ctq_nreliable++; 2678 if (l->ctl_position != NULL) 2679 l->ctl_position->cte_nodes[q->ctq_listno]. 2680 ctm_nreliable++; 2681 } 2682 mutex_exit(&q->ctq_lock); 2683 2684 return (0); 2685 } 2686