1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Contracts 30 * --------- 31 * 32 * Contracts are a primitive which enrich the relationships between 33 * processes and system resources. The primary purpose of contracts is 34 * to provide a means for the system to negotiate the departure from a 35 * binding relationship (e.g. pages locked in memory or a thread bound 36 * to processor), but they can also be used as a purely asynchronous 37 * error reporting mechanism as they are with process contracts. 38 * 39 * More information on how one interfaces with contracts and what 40 * contracts can do for you can be found in: 41 * PSARC 2003/193 Solaris Contracts 42 * PSARC 2004/460 Contracts addendum 43 * 44 * This file contains the core contracts framework. By itself it is 45 * useless: it depends the contracts filesystem (ctfs) to provide an 46 * interface to user processes and individual contract types to 47 * implement the process/resource relationships. 48 * 49 * Data structure overview 50 * ----------------------- 51 * 52 * A contract is represented by a contract_t, which itself points to an 53 * encapsulating contract-type specific contract object. A contract_t 54 * contains the contract's static identity (including its terms), its 55 * linkage to various bookkeeping structures, the contract-specific 56 * event queue, and a reference count. 57 * 58 * A contract template is represented by a ct_template_t, which, like a 59 * contract, points to an encapsulating contract-type specific template 60 * object. A ct_template_t contains the template's terms. 61 * 62 * An event queue is represented by a ct_equeue_t, and consists of a 63 * list of events, a list of listeners, and a list of listeners who are 64 * waiting for new events (affectionately referred to as "tail 65 * listeners"). There are three queue types, defined by ct_listnum_t 66 * (an enum). An event may be on one of each type of queue 67 * simultaneously; the list linkage used by a queue is determined by 68 * its type. 69 * 70 * An event is represented by a ct_kevent_t, which contains mostly 71 * static event data (e.g. id, payload). It also has an array of 72 * ct_member_t structures, each of which contains a list_node_t and 73 * represent the event's linkage in a specific event queue. 74 * 75 * Each open of an event endpoint results in the creation of a new 76 * listener, represented by a ct_listener_t. In addition to linkage 77 * into the aforementioned lists in the event_queue, a ct_listener_t 78 * contains a pointer to the ct_kevent_t it is currently positioned at 79 * as well as a set of status flags and other administrative data. 80 * 81 * Each process has a list of contracts it owns, p_ct_held; a pointer 82 * to the process contract it is a member of, p_ct_process; the linkage 83 * for that membership, p_ct_member; and an array of event queue 84 * structures representing the process bundle queues. 85 * 86 * Each LWP has an array of its active templates, lwp_ct_active; and 87 * the most recently created contracts, lwp_ct_latest. 88 * 89 * A process contract has a list of member processes and a list of 90 * inherited contracts. 91 * 92 * There is a system-wide list of all contracts, as well as per-type 93 * lists of contracts. 94 * 95 * Lock ordering overview 96 * ---------------------- 97 * 98 * Locks at the top are taken first: 99 * 100 * ct_evtlock 101 * regent ct_lock 102 * member ct_lock 103 * pidlock 104 * p_lock 105 * contract ctq_lock contract_lock 106 * pbundle ctq_lock 107 * cte_lock 108 * ct_reflock 109 * 110 * contract_lock and ctq_lock/cte_lock are not currently taken at the 111 * same time. 112 * 113 * Reference counting and locking 114 * ------------------------------ 115 * 116 * A contract has a reference count, protected by ct_reflock. 117 * (ct_reflock is also used in a couple other places where atomic 118 * access to a variable is needed in an innermost context). A process 119 * maintains a hold on each contract it owns. A process contract has a 120 * hold on each contract is has inherited. Each event has a hold on 121 * the contract which generated it. Process contract templates have 122 * holds on the contracts referred to by their transfer terms. CTFS 123 * contract directory nodes have holds on contracts. Lastly, various 124 * code paths may temporarily take holds on contracts to prevent them 125 * from disappearing while other processing is going on. It is 126 * important to note that the global contract lists do not hold 127 * references on contracts; a contract is removed from these structures 128 * atomically with the release of its last reference. 129 * 130 * At a given point in time, a contract can either be owned by a 131 * process, inherited by a regent process contract, or orphaned. A 132 * contract_t's owner and regent pointers, ct_owner and ct_regent, are 133 * protected by its ct_lock. The linkage in the holder's (holder = 134 * owner or regent) list of contracts, ct_ctlist, is protected by 135 * whatever lock protects the holder's data structure. In order for 136 * these two directions to remain consistent, changing the holder of a 137 * contract requires that both locks be held. 138 * 139 * Events also have reference counts. There is one hold on an event 140 * per queue it is present on, in addition to those needed for the 141 * usual sundry reasons. Individual listeners are associated with 142 * specific queues, and increase a queue-specific reference count 143 * stored in the ct_member_t structure. 144 * 145 * The dynamic contents of an event (reference count and flags) are 146 * protected by its cte_lock, while the contents of the embedded 147 * ct_member_t structures are protected by the locks of the queues they 148 * are linked into. A ct_listener_t's contents are also protected by 149 * its event queue's ctq_lock. 150 * 151 * Resource controls 152 * ----------------- 153 * 154 * Control: project.max-contracts (rc_project_contract) 155 * Description: Maximum number of contracts allowed a project. 156 * 157 * When a contract is created, the project's allocation is tested and 158 * (assuming success) increased. When the last reference to a 159 * contract is released, the creating project's allocation is 160 * decreased. 161 */ 162 163 #include <sys/mutex.h> 164 #include <sys/debug.h> 165 #include <sys/types.h> 166 #include <sys/param.h> 167 #include <sys/kmem.h> 168 #include <sys/thread.h> 169 #include <sys/id_space.h> 170 #include <sys/avl.h> 171 #include <sys/list.h> 172 #include <sys/sysmacros.h> 173 #include <sys/proc.h> 174 #include <sys/contract_impl.h> 175 #include <sys/contract/process_impl.h> 176 #include <sys/dditypes.h> 177 #include <sys/contract/device_impl.h> 178 #include <sys/systm.h> 179 #include <sys/atomic.h> 180 #include <sys/cmn_err.h> 181 #include <sys/model.h> 182 #include <sys/policy.h> 183 #include <sys/zone.h> 184 #include <sys/task.h> 185 #include <sys/ddi.h> 186 #include <sys/sunddi.h> 187 188 extern rctl_hndl_t rc_project_contract; 189 190 static id_space_t *contract_ids; 191 static avl_tree_t contract_avl; 192 static kmutex_t contract_lock; 193 194 int ct_ntypes = CTT_MAXTYPE; 195 static ct_type_t *ct_types_static[CTT_MAXTYPE]; 196 ct_type_t **ct_types = ct_types_static; 197 int ct_debug; 198 199 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int); 200 static void cte_queue_destroy(ct_equeue_t *); 201 static void cte_queue_drain(ct_equeue_t *, int); 202 static void cte_trim(ct_equeue_t *, contract_t *); 203 static void cte_copy(ct_equeue_t *, ct_equeue_t *); 204 205 /* 206 * contract_compar 207 * 208 * A contract comparator which sorts on contract ID. 209 */ 210 int 211 contract_compar(const void *x, const void *y) 212 { 213 const contract_t *ct1 = x; 214 const contract_t *ct2 = y; 215 216 if (ct1->ct_id < ct2->ct_id) 217 return (-1); 218 if (ct1->ct_id > ct2->ct_id) 219 return (1); 220 return (0); 221 } 222 223 /* 224 * contract_init 225 * 226 * Initializes the contract subsystem, the specific contract types, and 227 * process 0. 228 */ 229 void 230 contract_init(void) 231 { 232 /* 233 * Initialize contract subsystem. 234 */ 235 contract_ids = id_space_create("contracts", 1, INT_MAX); 236 avl_create(&contract_avl, contract_compar, sizeof (contract_t), 237 offsetof(contract_t, ct_ctavl)); 238 mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL); 239 240 /* 241 * Initialize contract types. 242 */ 243 contract_process_init(); 244 contract_device_init(); 245 246 /* 247 * Initialize p0/lwp0 contract state. 248 */ 249 avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t), 250 offsetof(contract_t, ct_ctlist)); 251 } 252 253 /* 254 * contract_dtor 255 * 256 * Performs basic destruction of the common portions of a contract. 257 * Called from the failure path of contract_ctor and from 258 * contract_rele. 259 */ 260 static void 261 contract_dtor(contract_t *ct) 262 { 263 cte_queue_destroy(&ct->ct_events); 264 list_destroy(&ct->ct_vnodes); 265 mutex_destroy(&ct->ct_reflock); 266 mutex_destroy(&ct->ct_lock); 267 mutex_destroy(&ct->ct_evtlock); 268 } 269 270 /* 271 * contract_ctor 272 * 273 * Called by a contract type to initialize a contract. Fails if the 274 * max-contract resource control would have been exceeded. After a 275 * successful call to contract_ctor, the contract is unlocked and 276 * visible in all namespaces; any type-specific initialization should 277 * be completed before calling contract_ctor. Returns 0 on success. 278 * 279 * Because not all callers can tolerate failure, a 0 value for canfail 280 * instructs contract_ctor to ignore the project.max-contracts resource 281 * control. Obviously, this "out" should only be employed by callers 282 * who are sufficiently constrained in other ways (e.g. newproc). 283 */ 284 int 285 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data, 286 ctflags_t flags, proc_t *author, int canfail) 287 { 288 avl_index_t where; 289 klwp_t *curlwp = ttolwp(curthread); 290 291 ASSERT(author == curproc); 292 293 mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL); 294 mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL); 295 mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL); 296 ct->ct_id = id_alloc(contract_ids); 297 298 cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0); 299 list_create(&ct->ct_vnodes, sizeof (contract_vnode_t), 300 offsetof(contract_vnode_t, ctv_node)); 301 302 /* 303 * Instance data 304 */ 305 ct->ct_ref = 2; /* one for the holder, one for "latest" */ 306 ct->ct_cuid = crgetuid(CRED()); 307 ct->ct_type = type; 308 ct->ct_data = data; 309 gethrestime(&ct->ct_ctime); 310 ct->ct_state = CTS_OWNED; 311 ct->ct_flags = flags; 312 ct->ct_regent = author->p_ct_process ? 313 &author->p_ct_process->conp_contract : NULL; 314 ct->ct_ev_info = tmpl->ctmpl_ev_info; 315 ct->ct_ev_crit = tmpl->ctmpl_ev_crit; 316 ct->ct_cookie = tmpl->ctmpl_cookie; 317 ct->ct_owner = author; 318 ct->ct_ntime.ctm_total = -1; 319 ct->ct_qtime.ctm_total = -1; 320 ct->ct_nevent = NULL; 321 322 /* 323 * Test project.max-contracts. 324 */ 325 mutex_enter(&author->p_lock); 326 mutex_enter(&contract_lock); 327 if (canfail && rctl_test(rc_project_contract, 328 author->p_task->tk_proj->kpj_rctls, author, 1, 329 RCA_SAFE) & RCT_DENY) { 330 id_free(contract_ids, ct->ct_id); 331 mutex_exit(&contract_lock); 332 mutex_exit(&author->p_lock); 333 ct->ct_events.ctq_flags |= CTQ_DEAD; 334 contract_dtor(ct); 335 return (1); 336 } 337 ct->ct_proj = author->p_task->tk_proj; 338 ct->ct_proj->kpj_data.kpd_contract++; 339 (void) project_hold(ct->ct_proj); 340 mutex_exit(&contract_lock); 341 342 /* 343 * Insert into holder's avl of contracts. 344 * We use an avl not because order is important, but because 345 * readdir of /proc/contracts requires we be able to use a 346 * scalar as an index into the process's list of contracts 347 */ 348 ct->ct_zoneid = author->p_zone->zone_id; 349 ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid; 350 VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL); 351 avl_insert(&author->p_ct_held, ct, where); 352 mutex_exit(&author->p_lock); 353 354 /* 355 * Insert into global contract AVL 356 */ 357 mutex_enter(&contract_lock); 358 VERIFY(avl_find(&contract_avl, ct, &where) == NULL); 359 avl_insert(&contract_avl, ct, where); 360 mutex_exit(&contract_lock); 361 362 /* 363 * Insert into type AVL 364 */ 365 mutex_enter(&type->ct_type_lock); 366 VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL); 367 avl_insert(&type->ct_type_avl, ct, where); 368 type->ct_type_timestruc = ct->ct_ctime; 369 mutex_exit(&type->ct_type_lock); 370 371 if (curlwp->lwp_ct_latest[type->ct_type_index]) 372 contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]); 373 curlwp->lwp_ct_latest[type->ct_type_index] = ct; 374 375 return (0); 376 } 377 378 /* 379 * contract_rele 380 * 381 * Releases a reference to a contract. If the caller had the last 382 * reference, the contract is removed from all namespaces, its 383 * allocation against the max-contracts resource control is released, 384 * and the contract type's free entry point is invoked for any 385 * type-specific deconstruction and to (presumably) free the object. 386 */ 387 void 388 contract_rele(contract_t *ct) 389 { 390 uint64_t nref; 391 392 mutex_enter(&ct->ct_reflock); 393 ASSERT(ct->ct_ref > 0); 394 nref = --ct->ct_ref; 395 mutex_exit(&ct->ct_reflock); 396 if (nref == 0) { 397 /* 398 * ct_owner is cleared when it drops its reference. 399 */ 400 ASSERT(ct->ct_owner == NULL); 401 ASSERT(ct->ct_evcnt == 0); 402 403 /* 404 * Remove from global contract AVL 405 */ 406 mutex_enter(&contract_lock); 407 avl_remove(&contract_avl, ct); 408 mutex_exit(&contract_lock); 409 410 /* 411 * Remove from type AVL 412 */ 413 mutex_enter(&ct->ct_type->ct_type_lock); 414 avl_remove(&ct->ct_type->ct_type_avl, ct); 415 mutex_exit(&ct->ct_type->ct_type_lock); 416 417 /* 418 * Release the contract's ID 419 */ 420 id_free(contract_ids, ct->ct_id); 421 422 /* 423 * Release project hold 424 */ 425 mutex_enter(&contract_lock); 426 ct->ct_proj->kpj_data.kpd_contract--; 427 project_rele(ct->ct_proj); 428 mutex_exit(&contract_lock); 429 430 /* 431 * Free the contract 432 */ 433 contract_dtor(ct); 434 ct->ct_type->ct_type_ops->contop_free(ct); 435 } 436 } 437 438 /* 439 * contract_hold 440 * 441 * Adds a reference to a contract 442 */ 443 void 444 contract_hold(contract_t *ct) 445 { 446 mutex_enter(&ct->ct_reflock); 447 ASSERT(ct->ct_ref < UINT64_MAX); 448 ct->ct_ref++; 449 mutex_exit(&ct->ct_reflock); 450 } 451 452 /* 453 * contract_getzuniqid 454 * 455 * Get a contract's zone unique ID. Needed because 64-bit reads and 456 * writes aren't atomic on x86. Since there are contexts where we are 457 * unable to take ct_lock, we instead use ct_reflock; in actuality any 458 * lock would do. 459 */ 460 uint64_t 461 contract_getzuniqid(contract_t *ct) 462 { 463 uint64_t zuniqid; 464 465 mutex_enter(&ct->ct_reflock); 466 zuniqid = ct->ct_mzuniqid; 467 mutex_exit(&ct->ct_reflock); 468 469 return (zuniqid); 470 } 471 472 /* 473 * contract_setzuniqid 474 * 475 * Sets a contract's zone unique ID. See contract_getzuniqid. 476 */ 477 void 478 contract_setzuniqid(contract_t *ct, uint64_t zuniqid) 479 { 480 mutex_enter(&ct->ct_reflock); 481 ct->ct_mzuniqid = zuniqid; 482 mutex_exit(&ct->ct_reflock); 483 } 484 485 /* 486 * contract_abandon 487 * 488 * Abandons the specified contract. If "explicit" is clear, the 489 * contract was implicitly abandoned (by process exit) and should be 490 * inherited if its terms allow it and its owner was a member of a 491 * regent contract. Otherwise, the contract type's abandon entry point 492 * is invoked to either destroy or orphan the contract. 493 */ 494 int 495 contract_abandon(contract_t *ct, proc_t *p, int explicit) 496 { 497 ct_equeue_t *q = NULL; 498 contract_t *parent = &p->p_ct_process->conp_contract; 499 int inherit = 0; 500 501 ASSERT(p == curproc); 502 503 mutex_enter(&ct->ct_lock); 504 505 /* 506 * Multiple contract locks are taken contract -> subcontract. 507 * Check if the contract will be inherited so we can acquire 508 * all the necessary locks before making sensitive changes. 509 */ 510 if (!explicit && (ct->ct_flags & CTF_INHERIT) && 511 contract_process_accept(parent)) { 512 mutex_exit(&ct->ct_lock); 513 mutex_enter(&parent->ct_lock); 514 mutex_enter(&ct->ct_lock); 515 inherit = 1; 516 } 517 518 if (ct->ct_owner != p) { 519 mutex_exit(&ct->ct_lock); 520 if (inherit) 521 mutex_exit(&parent->ct_lock); 522 return (EINVAL); 523 } 524 525 mutex_enter(&p->p_lock); 526 if (explicit) 527 avl_remove(&p->p_ct_held, ct); 528 ct->ct_owner = NULL; 529 mutex_exit(&p->p_lock); 530 531 /* 532 * Since we can't call cte_trim with the contract lock held, 533 * we grab the queue pointer here. 534 */ 535 if (p->p_ct_equeue) 536 q = p->p_ct_equeue[ct->ct_type->ct_type_index]; 537 538 /* 539 * contop_abandon may destroy the contract so we rely on it to 540 * drop ct_lock. We retain a reference on the contract so that 541 * the cte_trim which follows functions properly. Even though 542 * cte_trim doesn't dereference the contract pointer, it is 543 * still necessary to retain a reference to the contract so 544 * that we don't trim events which are sent by a subsequently 545 * allocated contract infortuitously located at the same address. 546 */ 547 contract_hold(ct); 548 549 if (inherit) { 550 ct->ct_state = CTS_INHERITED; 551 ASSERT(ct->ct_regent == parent); 552 contract_process_take(parent, ct); 553 554 /* 555 * We are handing off the process's reference to the 556 * parent contract. For this reason, the order in 557 * which we drop the contract locks is also important. 558 */ 559 mutex_exit(&ct->ct_lock); 560 mutex_exit(&parent->ct_lock); 561 } else { 562 ct->ct_regent = NULL; 563 ct->ct_type->ct_type_ops->contop_abandon(ct); 564 } 565 566 /* 567 * ct_lock has been dropped; we can safely trim the event 568 * queue now. 569 */ 570 if (q) { 571 mutex_enter(&q->ctq_lock); 572 cte_trim(q, ct); 573 mutex_exit(&q->ctq_lock); 574 } 575 576 contract_rele(ct); 577 578 return (0); 579 } 580 581 int 582 contract_newct(contract_t *ct) 583 { 584 return (ct->ct_type->ct_type_ops->contop_newct(ct)); 585 } 586 587 /* 588 * contract_adopt 589 * 590 * Adopts a contract. After a successful call to this routine, the 591 * previously inherited contract will belong to the calling process, 592 * and its events will have been appended to its new owner's process 593 * bundle queue. 594 */ 595 int 596 contract_adopt(contract_t *ct, proc_t *p) 597 { 598 avl_index_t where; 599 ct_equeue_t *q; 600 contract_t *parent; 601 602 ASSERT(p == curproc); 603 604 /* 605 * Ensure the process has an event queue. Checked by ASSERTs 606 * below. 607 */ 608 (void) contract_type_pbundle(ct->ct_type, p); 609 610 mutex_enter(&ct->ct_lock); 611 parent = ct->ct_regent; 612 if (ct->ct_state != CTS_INHERITED || 613 &p->p_ct_process->conp_contract != parent || 614 p->p_zone->zone_uniqid != ct->ct_czuniqid) { 615 mutex_exit(&ct->ct_lock); 616 return (EINVAL); 617 } 618 619 /* 620 * Multiple contract locks are taken contract -> subcontract. 621 */ 622 mutex_exit(&ct->ct_lock); 623 mutex_enter(&parent->ct_lock); 624 mutex_enter(&ct->ct_lock); 625 626 /* 627 * It is possible that the contract was adopted by someone else 628 * while its lock was dropped. It isn't possible for the 629 * contract to have been inherited by a different regent 630 * contract. 631 */ 632 if (ct->ct_state != CTS_INHERITED) { 633 mutex_exit(&parent->ct_lock); 634 mutex_exit(&ct->ct_lock); 635 return (EBUSY); 636 } 637 ASSERT(ct->ct_regent == parent); 638 639 ct->ct_state = CTS_OWNED; 640 641 contract_process_adopt(ct, p); 642 643 mutex_enter(&p->p_lock); 644 ct->ct_owner = p; 645 VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL); 646 avl_insert(&p->p_ct_held, ct, where); 647 mutex_exit(&p->p_lock); 648 649 ASSERT(ct->ct_owner->p_ct_equeue); 650 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]); 651 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]; 652 cte_copy(&ct->ct_events, q); 653 mutex_exit(&ct->ct_lock); 654 655 return (0); 656 } 657 658 /* 659 * contract_ack 660 * 661 * Acknowledges receipt of a critical event. 662 */ 663 int 664 contract_ack(contract_t *ct, uint64_t evid, int ack) 665 { 666 ct_kevent_t *ev; 667 list_t *queue = &ct->ct_events.ctq_events; 668 int error = ESRCH; 669 int nego = 0; 670 uint_t evtype; 671 672 ASSERT(ack == CT_ACK || ack == CT_NACK); 673 674 mutex_enter(&ct->ct_lock); 675 mutex_enter(&ct->ct_events.ctq_lock); 676 /* 677 * We are probably ACKing something near the head of the queue. 678 */ 679 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 680 if (ev->cte_id == evid) { 681 if (ev->cte_flags & CTE_NEG) 682 nego = 1; 683 else if (ack == CT_NACK) 684 break; 685 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 686 ev->cte_flags |= CTE_ACK; 687 ct->ct_evcnt--; 688 evtype = ev->cte_type; 689 error = 0; 690 } 691 break; 692 } 693 } 694 mutex_exit(&ct->ct_events.ctq_lock); 695 mutex_exit(&ct->ct_lock); 696 697 /* 698 * Not all critical events are negotiation events, however 699 * every negotiation event is a critical event. NEGEND events 700 * are critical events but are not negotiation events 701 */ 702 if (error || !nego) 703 return (error); 704 705 if (ack == CT_ACK) 706 error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid); 707 else 708 error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid); 709 710 return (error); 711 } 712 713 /*ARGSUSED*/ 714 int 715 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid) 716 { 717 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u", 718 ct->ct_id); 719 return (ENOSYS); 720 } 721 722 /*ARGSUSED*/ 723 int 724 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid) 725 { 726 cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u", 727 ct->ct_id); 728 return (ENOSYS); 729 } 730 731 /*ARGSUSED*/ 732 int 733 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid) 734 { 735 return (ERANGE); 736 } 737 738 /* 739 * contract_qack 740 * 741 * Asks that negotiations be extended by another time quantum 742 */ 743 int 744 contract_qack(contract_t *ct, uint64_t evid) 745 { 746 ct_kevent_t *ev; 747 list_t *queue = &ct->ct_events.ctq_events; 748 int nego = 0; 749 uint_t evtype; 750 751 mutex_enter(&ct->ct_lock); 752 mutex_enter(&ct->ct_events.ctq_lock); 753 754 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 755 if (ev->cte_id == evid) { 756 if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) { 757 evtype = ev->cte_type; 758 nego = 1; 759 } 760 break; 761 } 762 } 763 mutex_exit(&ct->ct_events.ctq_lock); 764 mutex_exit(&ct->ct_lock); 765 766 /* 767 * Only a negotiated event (which is by definition also a critical 768 * event) which has not yet been acknowledged can provide 769 * time quanta to a negotiating owner process. 770 */ 771 if (!nego) 772 return (ESRCH); 773 774 return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid)); 775 } 776 777 /* 778 * contract_orphan 779 * 780 * Icky-poo. This is a process-contract special, used to ACK all 781 * critical messages when a contract is orphaned. 782 */ 783 void 784 contract_orphan(contract_t *ct) 785 { 786 ct_kevent_t *ev; 787 list_t *queue = &ct->ct_events.ctq_events; 788 789 ASSERT(MUTEX_HELD(&ct->ct_lock)); 790 ASSERT(ct->ct_state != CTS_ORPHAN); 791 792 mutex_enter(&ct->ct_events.ctq_lock); 793 ct->ct_state = CTS_ORPHAN; 794 for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { 795 if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 796 ev->cte_flags |= CTE_ACK; 797 ct->ct_evcnt--; 798 } 799 } 800 mutex_exit(&ct->ct_events.ctq_lock); 801 802 ASSERT(ct->ct_evcnt == 0); 803 } 804 805 /* 806 * contract_destroy 807 * 808 * Explicit contract destruction. Called when contract is empty. 809 * The contract will actually stick around until all of its events are 810 * removed from the bundle and and process bundle queues, and all fds 811 * which refer to it are closed. See contract_dtor if you are looking 812 * for what destroys the contract structure. 813 */ 814 void 815 contract_destroy(contract_t *ct) 816 { 817 ASSERT(MUTEX_HELD(&ct->ct_lock)); 818 ASSERT(ct->ct_state != CTS_DEAD); 819 ASSERT(ct->ct_owner == NULL); 820 821 ct->ct_state = CTS_DEAD; 822 cte_queue_drain(&ct->ct_events, 1); 823 mutex_exit(&ct->ct_lock); 824 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock); 825 cte_trim(&ct->ct_type->ct_type_events, ct); 826 mutex_exit(&ct->ct_type->ct_type_events.ctq_lock); 827 mutex_enter(&ct->ct_lock); 828 ct->ct_type->ct_type_ops->contop_destroy(ct); 829 mutex_exit(&ct->ct_lock); 830 contract_rele(ct); 831 } 832 833 /* 834 * contract_vnode_get 835 * 836 * Obtains the contract directory vnode for this contract, if there is 837 * one. The caller must VN_RELE the vnode when they are through using 838 * it. 839 */ 840 vnode_t * 841 contract_vnode_get(contract_t *ct, vfs_t *vfsp) 842 { 843 contract_vnode_t *ctv; 844 vnode_t *vp = NULL; 845 846 mutex_enter(&ct->ct_lock); 847 for (ctv = list_head(&ct->ct_vnodes); ctv != NULL; 848 ctv = list_next(&ct->ct_vnodes, ctv)) 849 if (ctv->ctv_vnode->v_vfsp == vfsp) { 850 vp = ctv->ctv_vnode; 851 VN_HOLD(vp); 852 break; 853 } 854 mutex_exit(&ct->ct_lock); 855 return (vp); 856 } 857 858 /* 859 * contract_vnode_set 860 * 861 * Sets the contract directory vnode for this contract. We don't hold 862 * a reference on the vnode because we don't want to prevent it from 863 * being freed. The vnode's inactive entry point will take care of 864 * notifying us when it should be removed. 865 */ 866 void 867 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode) 868 { 869 mutex_enter(&ct->ct_lock); 870 ctv->ctv_vnode = vnode; 871 list_insert_head(&ct->ct_vnodes, ctv); 872 mutex_exit(&ct->ct_lock); 873 } 874 875 /* 876 * contract_vnode_clear 877 * 878 * Removes this vnode as the contract directory vnode for this 879 * contract. Called from a contract directory's inactive entry point, 880 * this may return 0 indicating that the vnode gained another reference 881 * because of a simultaneous call to contract_vnode_get. 882 */ 883 int 884 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv) 885 { 886 vnode_t *vp = ctv->ctv_vnode; 887 int result; 888 889 mutex_enter(&ct->ct_lock); 890 mutex_enter(&vp->v_lock); 891 if (vp->v_count == 1) { 892 list_remove(&ct->ct_vnodes, ctv); 893 result = 1; 894 } else { 895 vp->v_count--; 896 result = 0; 897 } 898 mutex_exit(&vp->v_lock); 899 mutex_exit(&ct->ct_lock); 900 901 return (result); 902 } 903 904 /* 905 * contract_exit 906 * 907 * Abandons all contracts held by process p, and drains process p's 908 * bundle queues. Called on process exit. 909 */ 910 void 911 contract_exit(proc_t *p) 912 { 913 contract_t *ct; 914 void *cookie = NULL; 915 int i; 916 917 ASSERT(p == curproc); 918 919 /* 920 * Abandon held contracts. contract_abandon knows enough not 921 * to remove the contract from the list a second time. We are 922 * exiting, so no locks are needed here. But because 923 * contract_abandon will take p_lock, we need to make sure we 924 * aren't holding it. 925 */ 926 ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 927 while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL) 928 VERIFY(contract_abandon(ct, p, 0) == 0); 929 930 /* 931 * Drain pbundles. Because a process bundle queue could have 932 * been passed to another process, they may not be freed right 933 * away. 934 */ 935 if (p->p_ct_equeue) { 936 for (i = 0; i < CTT_MAXTYPE; i++) 937 if (p->p_ct_equeue[i]) 938 cte_queue_drain(p->p_ct_equeue[i], 0); 939 kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *)); 940 } 941 } 942 943 static int 944 get_time_left(struct ct_time *t) 945 { 946 clock_t ticks_elapsed; 947 int secs_elapsed; 948 949 if (t->ctm_total == -1) 950 return (-1); 951 952 ticks_elapsed = ddi_get_lbolt() - t->ctm_start; 953 secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC); 954 return (secs_elapsed > 0 ? secs_elapsed : 0); 955 } 956 957 /* 958 * contract_status_common 959 * 960 * Populates a ct_status structure. Used by contract types in their 961 * status entry points and ctfs when only common information is 962 * requested. 963 */ 964 void 965 contract_status_common(contract_t *ct, zone_t *zone, void *status, 966 model_t model) 967 { 968 STRUCT_HANDLE(ct_status, lstatus); 969 970 STRUCT_SET_HANDLE(lstatus, model, status); 971 ASSERT(MUTEX_HELD(&ct->ct_lock)); 972 if (zone->zone_uniqid == GLOBAL_ZONEUNIQID || 973 zone->zone_uniqid == ct->ct_czuniqid) { 974 zone_t *czone; 975 zoneid_t zoneid = -1; 976 977 /* 978 * Contracts don't have holds on the zones they were 979 * created by. If the contract's zone no longer 980 * exists, we say its zoneid is -1. 981 */ 982 if (zone->zone_uniqid == ct->ct_czuniqid || 983 ct->ct_czuniqid == GLOBAL_ZONEUNIQID) { 984 zoneid = ct->ct_zoneid; 985 } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) { 986 if (czone->zone_uniqid == ct->ct_mzuniqid) 987 zoneid = ct->ct_zoneid; 988 zone_rele(czone); 989 } 990 991 STRUCT_FSET(lstatus, ctst_zoneid, zoneid); 992 STRUCT_FSET(lstatus, ctst_holder, 993 (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid : 994 (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0); 995 STRUCT_FSET(lstatus, ctst_state, ct->ct_state); 996 } else { 997 /* 998 * We are looking at a contract which was created by a 999 * process outside of our zone. We provide fake zone, 1000 * holder, and state information. 1001 */ 1002 1003 STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id); 1004 /* 1005 * Since "zone" can't disappear until the calling ctfs 1006 * is unmounted, zone_zsched must be valid. 1007 */ 1008 STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ? 1009 zone->zone_zsched->p_pid : 0); 1010 STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ? 1011 CTS_OWNED : ct->ct_state); 1012 } 1013 STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt); 1014 STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime)); 1015 STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime)); 1016 STRUCT_FSET(lstatus, ctst_nevid, 1017 ct->ct_nevent ? ct->ct_nevent->cte_id : 0); 1018 STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit); 1019 STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info); 1020 STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie); 1021 STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index); 1022 STRUCT_FSET(lstatus, ctst_id, ct->ct_id); 1023 } 1024 1025 /* 1026 * contract_checkcred 1027 * 1028 * Determines if the specified contract is owned by a process with the 1029 * same effective uid as the specified credential. The caller must 1030 * ensure that the uid spaces are the same. Returns 1 on success. 1031 */ 1032 static int 1033 contract_checkcred(contract_t *ct, const cred_t *cr) 1034 { 1035 proc_t *p; 1036 int fail = 1; 1037 1038 mutex_enter(&ct->ct_lock); 1039 if ((p = ct->ct_owner) != NULL) { 1040 mutex_enter(&p->p_crlock); 1041 fail = crgetuid(cr) != crgetuid(p->p_cred); 1042 mutex_exit(&p->p_crlock); 1043 } 1044 mutex_exit(&ct->ct_lock); 1045 1046 return (!fail); 1047 } 1048 1049 /* 1050 * contract_owned 1051 * 1052 * Determines if the specified credential can view an event generated 1053 * by the specified contract. If locked is set, the contract's ct_lock 1054 * is held and the caller will need to do additional work to determine 1055 * if they truly can see the event. Returns 1 on success. 1056 */ 1057 int 1058 contract_owned(contract_t *ct, const cred_t *cr, int locked) 1059 { 1060 int owner, cmatch, zmatch; 1061 uint64_t zuniqid, mzuniqid; 1062 uid_t euid; 1063 1064 ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock)); 1065 1066 zuniqid = curproc->p_zone->zone_uniqid; 1067 mzuniqid = contract_getzuniqid(ct); 1068 euid = crgetuid(cr); 1069 1070 /* 1071 * owner: we own the contract 1072 * cmatch: we are in the creator's (and holder's) zone and our 1073 * uid matches the creator's or holder's 1074 * zmatch: we are in the effective zone of a contract created 1075 * in the global zone, and our uid matches that of the 1076 * virtualized holder's (zsched/kcred) 1077 */ 1078 owner = (ct->ct_owner == curproc); 1079 cmatch = (zuniqid == ct->ct_czuniqid) && 1080 ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr))); 1081 zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) && 1082 (crgetuid(kcred) == euid); 1083 1084 return (owner || cmatch || zmatch); 1085 } 1086 1087 1088 /* 1089 * contract_type_init 1090 * 1091 * Called by contract types to register themselves with the contracts 1092 * framework. 1093 */ 1094 ct_type_t * 1095 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops, 1096 ct_f_default_t *dfault) 1097 { 1098 ct_type_t *result; 1099 1100 ASSERT(type < CTT_MAXTYPE); 1101 1102 result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP); 1103 1104 mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL); 1105 avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t), 1106 offsetof(contract_t, ct_cttavl)); 1107 cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0); 1108 result->ct_type_name = name; 1109 result->ct_type_ops = ops; 1110 result->ct_type_default = dfault; 1111 result->ct_type_evid = 0; 1112 gethrestime(&result->ct_type_timestruc); 1113 result->ct_type_index = type; 1114 1115 ct_types[type] = result; 1116 1117 return (result); 1118 } 1119 1120 /* 1121 * contract_type_count 1122 * 1123 * Obtains the number of contracts of a particular type. 1124 */ 1125 int 1126 contract_type_count(ct_type_t *type) 1127 { 1128 ulong_t count; 1129 1130 mutex_enter(&type->ct_type_lock); 1131 count = avl_numnodes(&type->ct_type_avl); 1132 mutex_exit(&type->ct_type_lock); 1133 1134 return (count); 1135 } 1136 1137 /* 1138 * contract_type_max 1139 * 1140 * Obtains the maximum contract id of of a particular type. 1141 */ 1142 ctid_t 1143 contract_type_max(ct_type_t *type) 1144 { 1145 contract_t *ct; 1146 ctid_t res; 1147 1148 mutex_enter(&type->ct_type_lock); 1149 ct = avl_last(&type->ct_type_avl); 1150 res = ct ? ct->ct_id : -1; 1151 mutex_exit(&type->ct_type_lock); 1152 1153 return (res); 1154 } 1155 1156 /* 1157 * contract_max 1158 * 1159 * Obtains the maximum contract id. 1160 */ 1161 ctid_t 1162 contract_max(void) 1163 { 1164 contract_t *ct; 1165 ctid_t res; 1166 1167 mutex_enter(&contract_lock); 1168 ct = avl_last(&contract_avl); 1169 res = ct ? ct->ct_id : -1; 1170 mutex_exit(&contract_lock); 1171 1172 return (res); 1173 } 1174 1175 /* 1176 * contract_lookup_common 1177 * 1178 * Common code for contract_lookup and contract_type_lookup. Takes a 1179 * pointer to an AVL tree to search in. Should be called with the 1180 * appropriate tree-protecting lock held (unfortunately unassertable). 1181 */ 1182 static ctid_t 1183 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current) 1184 { 1185 contract_t template, *ct; 1186 avl_index_t where; 1187 ctid_t res; 1188 1189 template.ct_id = current; 1190 ct = avl_find(tree, &template, &where); 1191 if (ct == NULL) 1192 ct = avl_nearest(tree, where, AVL_AFTER); 1193 if (zuniqid != GLOBAL_ZONEUNIQID) 1194 while (ct && (contract_getzuniqid(ct) != zuniqid)) 1195 ct = AVL_NEXT(tree, ct); 1196 res = ct ? ct->ct_id : -1; 1197 1198 return (res); 1199 } 1200 1201 /* 1202 * contract_type_lookup 1203 * 1204 * Returns the next type contract after the specified id, visible from 1205 * the specified zone. 1206 */ 1207 ctid_t 1208 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current) 1209 { 1210 ctid_t res; 1211 1212 mutex_enter(&type->ct_type_lock); 1213 res = contract_lookup_common(&type->ct_type_avl, zuniqid, current); 1214 mutex_exit(&type->ct_type_lock); 1215 1216 return (res); 1217 } 1218 1219 /* 1220 * contract_lookup 1221 * 1222 * Returns the next contract after the specified id, visible from the 1223 * specified zone. 1224 */ 1225 ctid_t 1226 contract_lookup(uint64_t zuniqid, ctid_t current) 1227 { 1228 ctid_t res; 1229 1230 mutex_enter(&contract_lock); 1231 res = contract_lookup_common(&contract_avl, zuniqid, current); 1232 mutex_exit(&contract_lock); 1233 1234 return (res); 1235 } 1236 1237 /* 1238 * contract_plookup 1239 * 1240 * Returns the next contract held by process p after the specified id, 1241 * visible from the specified zone. Made complicated by the fact that 1242 * contracts visible in a zone but held by processes outside of the 1243 * zone need to appear as being held by zsched to zone members. 1244 */ 1245 ctid_t 1246 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid) 1247 { 1248 contract_t template, *ct; 1249 avl_index_t where; 1250 ctid_t res; 1251 1252 template.ct_id = current; 1253 if (zuniqid != GLOBAL_ZONEUNIQID && 1254 (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) { 1255 /* This is inelegant. */ 1256 mutex_enter(&contract_lock); 1257 ct = avl_find(&contract_avl, &template, &where); 1258 if (ct == NULL) 1259 ct = avl_nearest(&contract_avl, where, AVL_AFTER); 1260 while (ct && !(ct->ct_state < CTS_ORPHAN && 1261 contract_getzuniqid(ct) == zuniqid && 1262 ct->ct_czuniqid == GLOBAL_ZONEUNIQID)) 1263 ct = AVL_NEXT(&contract_avl, ct); 1264 res = ct ? ct->ct_id : -1; 1265 mutex_exit(&contract_lock); 1266 } else { 1267 mutex_enter(&p->p_lock); 1268 ct = avl_find(&p->p_ct_held, &template, &where); 1269 if (ct == NULL) 1270 ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER); 1271 res = ct ? ct->ct_id : -1; 1272 mutex_exit(&p->p_lock); 1273 } 1274 1275 return (res); 1276 } 1277 1278 /* 1279 * contract_ptr_common 1280 * 1281 * Common code for contract_ptr and contract_type_ptr. Takes a pointer 1282 * to an AVL tree to search in. Should be called with the appropriate 1283 * tree-protecting lock held (unfortunately unassertable). 1284 */ 1285 static contract_t * 1286 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid) 1287 { 1288 contract_t template, *ct; 1289 1290 template.ct_id = id; 1291 ct = avl_find(tree, &template, NULL); 1292 if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID && 1293 contract_getzuniqid(ct) != zuniqid)) { 1294 return (NULL); 1295 } 1296 1297 /* 1298 * Check to see if a thread is in the window in contract_rele 1299 * between dropping the reference count and removing the 1300 * contract from the type AVL. 1301 */ 1302 mutex_enter(&ct->ct_reflock); 1303 if (ct->ct_ref) { 1304 ct->ct_ref++; 1305 mutex_exit(&ct->ct_reflock); 1306 } else { 1307 mutex_exit(&ct->ct_reflock); 1308 ct = NULL; 1309 } 1310 1311 return (ct); 1312 } 1313 1314 /* 1315 * contract_type_ptr 1316 * 1317 * Returns a pointer to the contract with the specified id. The 1318 * contract is held, so the caller needs to release the reference when 1319 * it is through with the contract. 1320 */ 1321 contract_t * 1322 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid) 1323 { 1324 contract_t *ct; 1325 1326 mutex_enter(&type->ct_type_lock); 1327 ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid); 1328 mutex_exit(&type->ct_type_lock); 1329 1330 return (ct); 1331 } 1332 1333 /* 1334 * contract_ptr 1335 * 1336 * Returns a pointer to the contract with the specified id. The 1337 * contract is held, so the caller needs to release the reference when 1338 * it is through with the contract. 1339 */ 1340 contract_t * 1341 contract_ptr(ctid_t id, uint64_t zuniqid) 1342 { 1343 contract_t *ct; 1344 1345 mutex_enter(&contract_lock); 1346 ct = contract_ptr_common(&contract_avl, id, zuniqid); 1347 mutex_exit(&contract_lock); 1348 1349 return (ct); 1350 } 1351 1352 /* 1353 * contract_type_time 1354 * 1355 * Obtains the last time a contract of a particular type was created. 1356 */ 1357 void 1358 contract_type_time(ct_type_t *type, timestruc_t *time) 1359 { 1360 mutex_enter(&type->ct_type_lock); 1361 *time = type->ct_type_timestruc; 1362 mutex_exit(&type->ct_type_lock); 1363 } 1364 1365 /* 1366 * contract_type_bundle 1367 * 1368 * Obtains a type's bundle queue. 1369 */ 1370 ct_equeue_t * 1371 contract_type_bundle(ct_type_t *type) 1372 { 1373 return (&type->ct_type_events); 1374 } 1375 1376 /* 1377 * contract_type_pbundle 1378 * 1379 * Obtain's a process's bundle queue. If one doesn't exist, one is 1380 * created. Often used simply to ensure that a bundle queue is 1381 * allocated. 1382 */ 1383 ct_equeue_t * 1384 contract_type_pbundle(ct_type_t *type, proc_t *pp) 1385 { 1386 /* 1387 * If there isn't an array of bundle queues, allocate one. 1388 */ 1389 if (pp->p_ct_equeue == NULL) { 1390 size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *); 1391 ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP); 1392 1393 mutex_enter(&pp->p_lock); 1394 if (pp->p_ct_equeue) 1395 kmem_free(qa, size); 1396 else 1397 pp->p_ct_equeue = qa; 1398 mutex_exit(&pp->p_lock); 1399 } 1400 1401 /* 1402 * If there isn't a bundle queue of the required type, allocate 1403 * one. 1404 */ 1405 if (pp->p_ct_equeue[type->ct_type_index] == NULL) { 1406 ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP); 1407 cte_queue_create(q, CTEL_PBUNDLE, 20, 1); 1408 1409 mutex_enter(&pp->p_lock); 1410 if (pp->p_ct_equeue[type->ct_type_index]) 1411 cte_queue_drain(q, 0); 1412 else 1413 pp->p_ct_equeue[type->ct_type_index] = q; 1414 mutex_exit(&pp->p_lock); 1415 } 1416 1417 return (pp->p_ct_equeue[type->ct_type_index]); 1418 } 1419 1420 /* 1421 * ctmpl_free 1422 * 1423 * Frees a template. 1424 */ 1425 void 1426 ctmpl_free(ct_template_t *template) 1427 { 1428 mutex_destroy(&template->ctmpl_lock); 1429 template->ctmpl_ops->ctop_free(template); 1430 } 1431 1432 /* 1433 * ctmpl_dup 1434 * 1435 * Creates a copy of a template. 1436 */ 1437 ct_template_t * 1438 ctmpl_dup(ct_template_t *template) 1439 { 1440 ct_template_t *new; 1441 1442 if (template == NULL) 1443 return (NULL); 1444 1445 new = template->ctmpl_ops->ctop_dup(template); 1446 /* 1447 * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and 1448 * should have remain held until now. 1449 */ 1450 mutex_exit(&template->ctmpl_lock); 1451 1452 return (new); 1453 } 1454 1455 /* 1456 * ctmpl_set 1457 * 1458 * Sets the requested terms of a template. 1459 */ 1460 int 1461 ctmpl_set(ct_template_t *template, ct_param_t *param, const cred_t *cr) 1462 { 1463 int result = 0; 1464 uint64_t param_value; 1465 1466 if (param->ctpm_id == CTP_COOKIE || 1467 param->ctpm_id == CTP_EV_INFO || 1468 param->ctpm_id == CTP_EV_CRITICAL) { 1469 if (param->ctpm_size < sizeof (uint64_t)) { 1470 return (EINVAL); 1471 } else { 1472 param_value = *(uint64_t *)param->ctpm_value; 1473 } 1474 } 1475 1476 mutex_enter(&template->ctmpl_lock); 1477 switch (param->ctpm_id) { 1478 case CTP_COOKIE: 1479 template->ctmpl_cookie = param_value; 1480 break; 1481 case CTP_EV_INFO: 1482 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) 1483 result = EINVAL; 1484 else 1485 template->ctmpl_ev_info = param_value; 1486 break; 1487 case CTP_EV_CRITICAL: 1488 if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) { 1489 result = EINVAL; 1490 break; 1491 } else if ((~template->ctmpl_ev_crit & param_value) == 0) { 1492 /* 1493 * Assume that a pure reduction of the critical 1494 * set is allowed by the contract type. 1495 */ 1496 template->ctmpl_ev_crit = param_value; 1497 break; 1498 } 1499 /* 1500 * There may be restrictions on what we can make 1501 * critical, so we defer to the judgement of the 1502 * contract type. 1503 */ 1504 /* FALLTHROUGH */ 1505 default: 1506 result = template->ctmpl_ops->ctop_set(template, param, cr); 1507 } 1508 mutex_exit(&template->ctmpl_lock); 1509 1510 return (result); 1511 } 1512 1513 /* 1514 * ctmpl_get 1515 * 1516 * Obtains the requested terms from a template. 1517 * 1518 * If the term requested is a variable-sized term and the buffer 1519 * provided is too small for the data, we truncate the data and return 1520 * the buffer size necessary to fit the term in param->ctpm_size. If the 1521 * term requested is fix-sized (uint64_t) and the buffer provided is too 1522 * small, we return EINVAL. This should never happen if you're using 1523 * libcontract(3LIB), only if you call ioctl with a hand constructed 1524 * ct_param_t argument. 1525 * 1526 * Currently, only contract specific parameters have variable-sized 1527 * parameters. 1528 */ 1529 int 1530 ctmpl_get(ct_template_t *template, ct_param_t *param) 1531 { 1532 int result = 0; 1533 uint64_t *param_value; 1534 1535 if (param->ctpm_id == CTP_COOKIE || 1536 param->ctpm_id == CTP_EV_INFO || 1537 param->ctpm_id == CTP_EV_CRITICAL) { 1538 if (param->ctpm_size < sizeof (uint64_t)) { 1539 return (EINVAL); 1540 } else { 1541 param_value = param->ctpm_value; 1542 param->ctpm_size = sizeof (uint64_t); 1543 } 1544 } 1545 1546 mutex_enter(&template->ctmpl_lock); 1547 switch (param->ctpm_id) { 1548 case CTP_COOKIE: 1549 *param_value = template->ctmpl_cookie; 1550 break; 1551 case CTP_EV_INFO: 1552 *param_value = template->ctmpl_ev_info; 1553 break; 1554 case CTP_EV_CRITICAL: 1555 *param_value = template->ctmpl_ev_crit; 1556 break; 1557 default: 1558 result = template->ctmpl_ops->ctop_get(template, param); 1559 } 1560 mutex_exit(&template->ctmpl_lock); 1561 1562 return (result); 1563 } 1564 1565 /* 1566 * ctmpl_makecurrent 1567 * 1568 * Used by ctmpl_activate and ctmpl_clear to set the current thread's 1569 * active template. Frees the old active template, if there was one. 1570 */ 1571 static void 1572 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new) 1573 { 1574 klwp_t *curlwp = ttolwp(curthread); 1575 proc_t *p = curproc; 1576 ct_template_t *old; 1577 1578 mutex_enter(&p->p_lock); 1579 old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index]; 1580 curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new; 1581 mutex_exit(&p->p_lock); 1582 1583 if (old) 1584 ctmpl_free(old); 1585 } 1586 1587 /* 1588 * ctmpl_activate 1589 * 1590 * Copy the specified template as the current thread's activate 1591 * template of that type. 1592 */ 1593 void 1594 ctmpl_activate(ct_template_t *template) 1595 { 1596 ctmpl_makecurrent(template, ctmpl_dup(template)); 1597 } 1598 1599 /* 1600 * ctmpl_clear 1601 * 1602 * Clears the current thread's activate template of the same type as 1603 * the specified template. 1604 */ 1605 void 1606 ctmpl_clear(ct_template_t *template) 1607 { 1608 ctmpl_makecurrent(template, NULL); 1609 } 1610 1611 /* 1612 * ctmpl_create 1613 * 1614 * Creates a new contract using the specified template. 1615 */ 1616 int 1617 ctmpl_create(ct_template_t *template, ctid_t *ctidp) 1618 { 1619 return (template->ctmpl_ops->ctop_create(template, ctidp)); 1620 } 1621 1622 /* 1623 * ctmpl_init 1624 * 1625 * Initializes the common portion of a new contract template. 1626 */ 1627 void 1628 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data) 1629 { 1630 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL); 1631 new->ctmpl_ops = ops; 1632 new->ctmpl_type = type; 1633 new->ctmpl_data = data; 1634 new->ctmpl_ev_info = new->ctmpl_ev_crit = 0; 1635 new->ctmpl_cookie = 0; 1636 } 1637 1638 /* 1639 * ctmpl_copy 1640 * 1641 * Copies the common portions of a contract template. Intended for use 1642 * by a contract type's ctop_dup template op. Returns with the old 1643 * template's lock held, which will should remain held until the 1644 * template op returns (it is dropped by ctmpl_dup). 1645 */ 1646 void 1647 ctmpl_copy(ct_template_t *new, ct_template_t *old) 1648 { 1649 mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL); 1650 mutex_enter(&old->ctmpl_lock); 1651 new->ctmpl_ops = old->ctmpl_ops; 1652 new->ctmpl_type = old->ctmpl_type; 1653 new->ctmpl_ev_crit = old->ctmpl_ev_crit; 1654 new->ctmpl_ev_info = old->ctmpl_ev_info; 1655 new->ctmpl_cookie = old->ctmpl_cookie; 1656 } 1657 1658 /* 1659 * ctmpl_create_inval 1660 * 1661 * Returns EINVAL. Provided for the convenience of those contract 1662 * types which don't support ct_tmpl_create(3contract) and would 1663 * otherwise need to create their own stub for the ctop_create template 1664 * op. 1665 */ 1666 /*ARGSUSED*/ 1667 int 1668 ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp) 1669 { 1670 return (EINVAL); 1671 } 1672 1673 1674 /* 1675 * cte_queue_create 1676 * 1677 * Initializes a queue of a particular type. If dynamic is set, the 1678 * queue is to be freed when its last listener is removed after being 1679 * drained. 1680 */ 1681 static void 1682 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic) 1683 { 1684 mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL); 1685 q->ctq_listno = list; 1686 list_create(&q->ctq_events, sizeof (ct_kevent_t), 1687 offsetof(ct_kevent_t, cte_nodes[list].ctm_node)); 1688 list_create(&q->ctq_listeners, sizeof (ct_listener_t), 1689 offsetof(ct_listener_t, ctl_allnode)); 1690 list_create(&q->ctq_tail, sizeof (ct_listener_t), 1691 offsetof(ct_listener_t, ctl_tailnode)); 1692 gethrestime(&q->ctq_atime); 1693 q->ctq_nlisteners = 0; 1694 q->ctq_nreliable = 0; 1695 q->ctq_ninf = 0; 1696 q->ctq_max = maxinf; 1697 1698 /* 1699 * Bundle queues and contract queues are embedded in other 1700 * structures and are implicitly referenced counted by virtue 1701 * of their vnodes' indirect hold on their contracts. Process 1702 * bundle queues are dynamically allocated and may persist 1703 * after the death of the process, so they must be explicitly 1704 * reference counted. 1705 */ 1706 q->ctq_flags = dynamic ? CTQ_REFFED : 0; 1707 } 1708 1709 /* 1710 * cte_queue_destroy 1711 * 1712 * Destroys the specified queue. The queue is freed if referenced 1713 * counted. 1714 */ 1715 static void 1716 cte_queue_destroy(ct_equeue_t *q) 1717 { 1718 ASSERT(q->ctq_flags & CTQ_DEAD); 1719 ASSERT(q->ctq_nlisteners == 0); 1720 ASSERT(q->ctq_nreliable == 0); 1721 list_destroy(&q->ctq_events); 1722 list_destroy(&q->ctq_listeners); 1723 list_destroy(&q->ctq_tail); 1724 mutex_destroy(&q->ctq_lock); 1725 if (q->ctq_flags & CTQ_REFFED) 1726 kmem_free(q, sizeof (ct_equeue_t)); 1727 } 1728 1729 /* 1730 * cte_hold 1731 * 1732 * Takes a hold on the specified event. 1733 */ 1734 static void 1735 cte_hold(ct_kevent_t *e) 1736 { 1737 mutex_enter(&e->cte_lock); 1738 ASSERT(e->cte_refs > 0); 1739 e->cte_refs++; 1740 mutex_exit(&e->cte_lock); 1741 } 1742 1743 /* 1744 * cte_rele 1745 * 1746 * Releases a hold on the specified event. If the caller had the last 1747 * reference, frees the event and releases its hold on the contract 1748 * that generated it. 1749 */ 1750 static void 1751 cte_rele(ct_kevent_t *e) 1752 { 1753 mutex_enter(&e->cte_lock); 1754 ASSERT(e->cte_refs > 0); 1755 if (--e->cte_refs) { 1756 mutex_exit(&e->cte_lock); 1757 return; 1758 } 1759 1760 contract_rele(e->cte_contract); 1761 1762 mutex_destroy(&e->cte_lock); 1763 if (e->cte_data) 1764 nvlist_free(e->cte_data); 1765 if (e->cte_gdata) 1766 nvlist_free(e->cte_gdata); 1767 kmem_free(e, sizeof (ct_kevent_t)); 1768 } 1769 1770 /* 1771 * cte_qrele 1772 * 1773 * Remove this listener's hold on the specified event, removing and 1774 * releasing the queue's hold on the event if appropriate. 1775 */ 1776 static void 1777 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e) 1778 { 1779 ct_member_t *member = &e->cte_nodes[q->ctq_listno]; 1780 1781 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1782 1783 if (l->ctl_flags & CTLF_RELIABLE) 1784 member->ctm_nreliable--; 1785 if ((--member->ctm_refs == 0) && member->ctm_trimmed) { 1786 member->ctm_trimmed = 0; 1787 list_remove(&q->ctq_events, e); 1788 cte_rele(e); 1789 } 1790 } 1791 1792 /* 1793 * cte_qmove 1794 * 1795 * Move this listener to the specified event in the queue. 1796 */ 1797 static ct_kevent_t * 1798 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e) 1799 { 1800 ct_kevent_t *olde; 1801 1802 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1803 ASSERT(l->ctl_equeue == q); 1804 1805 if ((olde = l->ctl_position) == NULL) 1806 list_remove(&q->ctq_tail, l); 1807 1808 while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed) 1809 e = list_next(&q->ctq_events, e); 1810 1811 if (e != NULL) { 1812 e->cte_nodes[q->ctq_listno].ctm_refs++; 1813 if (l->ctl_flags & CTLF_RELIABLE) 1814 e->cte_nodes[q->ctq_listno].ctm_nreliable++; 1815 } else { 1816 list_insert_tail(&q->ctq_tail, l); 1817 } 1818 1819 l->ctl_position = e; 1820 if (olde) 1821 cte_qrele(q, l, olde); 1822 1823 return (e); 1824 } 1825 1826 /* 1827 * cte_checkcred 1828 * 1829 * Determines if the specified event's contract is owned by a process 1830 * with the same effective uid as the specified credential. Called 1831 * after a failed call to contract_owned with locked set. Because it 1832 * drops the queue lock, its caller (cte_qreadable) needs to make sure 1833 * we're still in the same place after we return. Returns 1 on 1834 * success. 1835 */ 1836 static int 1837 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr) 1838 { 1839 int result; 1840 contract_t *ct = e->cte_contract; 1841 1842 cte_hold(e); 1843 mutex_exit(&q->ctq_lock); 1844 result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid && 1845 contract_checkcred(ct, cr); 1846 mutex_enter(&q->ctq_lock); 1847 cte_rele(e); 1848 1849 return (result); 1850 } 1851 1852 /* 1853 * cte_qreadable 1854 * 1855 * Ensures that the listener is pointing to a valid event that the 1856 * caller has the credentials to read. Returns 0 if we can read the 1857 * event we're pointing to. 1858 */ 1859 static int 1860 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr, 1861 uint64_t zuniqid, int crit) 1862 { 1863 ct_kevent_t *e, *next; 1864 contract_t *ct; 1865 1866 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1867 ASSERT(l->ctl_equeue == q); 1868 1869 if (l->ctl_flags & CTLF_COPYOUT) 1870 return (1); 1871 1872 next = l->ctl_position; 1873 while (e = cte_qmove(q, l, next)) { 1874 ct = e->cte_contract; 1875 /* 1876 * Check obvious things first. If we are looking for a 1877 * critical message, is this one? If we aren't in the 1878 * global zone, is this message meant for us? 1879 */ 1880 if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) || 1881 (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID && 1882 zuniqid != contract_getzuniqid(ct))) { 1883 1884 next = list_next(&q->ctq_events, e); 1885 1886 /* 1887 * Next, see if our effective uid equals that of owner 1888 * or author of the contract. Since we are holding the 1889 * queue lock, contract_owned can't always check if we 1890 * have the same effective uid as the contract's 1891 * owner. If it comes to that, it fails and we take 1892 * the slow(er) path. 1893 */ 1894 } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) { 1895 1896 /* 1897 * At this point we either don't have any claim 1898 * to this contract or we match the effective 1899 * uid of the owner but couldn't tell. We 1900 * first test for a NULL holder so that events 1901 * from orphans and inherited contracts avoid 1902 * the penalty phase. 1903 */ 1904 if (e->cte_contract->ct_owner == NULL && 1905 !secpolicy_contract_observer_choice(cr)) 1906 next = list_next(&q->ctq_events, e); 1907 1908 /* 1909 * cte_checkcred will juggle locks to see if we 1910 * have the same uid as the event's contract's 1911 * current owner. If it succeeds, we have to 1912 * make sure we are in the same point in the 1913 * queue. 1914 */ 1915 else if (cte_checkcred(q, e, cr) && 1916 l->ctl_position == e) 1917 break; 1918 1919 /* 1920 * cte_checkcred failed; see if we're in the 1921 * same place. 1922 */ 1923 else if (l->ctl_position == e) 1924 if (secpolicy_contract_observer_choice(cr)) 1925 break; 1926 else 1927 next = list_next(&q->ctq_events, e); 1928 1929 /* 1930 * cte_checkcred failed, and our position was 1931 * changed. Start from there. 1932 */ 1933 else 1934 next = l->ctl_position; 1935 } else { 1936 break; 1937 } 1938 } 1939 1940 /* 1941 * We check for CTLF_COPYOUT again in case we dropped the queue 1942 * lock in cte_checkcred. 1943 */ 1944 return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL)); 1945 } 1946 1947 /* 1948 * cte_qwakeup 1949 * 1950 * Wakes up any waiting listeners and points them at the specified event. 1951 */ 1952 static void 1953 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e) 1954 { 1955 ct_listener_t *l; 1956 1957 ASSERT(MUTEX_HELD(&q->ctq_lock)); 1958 1959 while (l = list_head(&q->ctq_tail)) { 1960 list_remove(&q->ctq_tail, l); 1961 e->cte_nodes[q->ctq_listno].ctm_refs++; 1962 if (l->ctl_flags & CTLF_RELIABLE) 1963 e->cte_nodes[q->ctq_listno].ctm_nreliable++; 1964 l->ctl_position = e; 1965 cv_signal(&l->ctl_cv); 1966 pollwakeup(&l->ctl_pollhead, POLLIN); 1967 } 1968 } 1969 1970 /* 1971 * cte_copy 1972 * 1973 * Copies events from the specified contract event queue to the 1974 * end of the specified process bundle queue. Only called from 1975 * contract_adopt. 1976 * 1977 * We copy to the end of the target queue instead of mixing the events 1978 * in their proper order because otherwise the act of adopting a 1979 * contract would require a process to reset all process bundle 1980 * listeners it needed to see the new events. This would, in turn, 1981 * require the process to keep track of which preexisting events had 1982 * already been processed. 1983 */ 1984 static void 1985 cte_copy(ct_equeue_t *q, ct_equeue_t *newq) 1986 { 1987 ct_kevent_t *e, *first = NULL; 1988 1989 ASSERT(q->ctq_listno == CTEL_CONTRACT); 1990 ASSERT(newq->ctq_listno == CTEL_PBUNDLE); 1991 1992 mutex_enter(&q->ctq_lock); 1993 mutex_enter(&newq->ctq_lock); 1994 1995 /* 1996 * For now, only copy critical events. 1997 */ 1998 for (e = list_head(&q->ctq_events); e != NULL; 1999 e = list_next(&q->ctq_events, e)) { 2000 if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { 2001 if (first == NULL) 2002 first = e; 2003 list_insert_tail(&newq->ctq_events, e); 2004 cte_hold(e); 2005 } 2006 } 2007 2008 mutex_exit(&q->ctq_lock); 2009 2010 if (first) 2011 cte_qwakeup(newq, first); 2012 2013 mutex_exit(&newq->ctq_lock); 2014 } 2015 2016 /* 2017 * cte_trim 2018 * 2019 * Trims unneeded events from an event queue. Algorithm works as 2020 * follows: 2021 * 2022 * Removes all informative and acknowledged critical events until the 2023 * first referenced event is found. 2024 * 2025 * If a contract is specified, removes all events (regardless of 2026 * acknowledgement) generated by that contract until the first event 2027 * referenced by a reliable listener is found. Reference events are 2028 * removed by marking them "trimmed". Such events will be removed 2029 * when the last reference is dropped and will be skipped by future 2030 * listeners. 2031 * 2032 * This is pretty basic. Ideally this should remove from the middle of 2033 * the list (i.e. beyond the first referenced event), and even 2034 * referenced events. 2035 */ 2036 static void 2037 cte_trim(ct_equeue_t *q, contract_t *ct) 2038 { 2039 ct_kevent_t *e, *next; 2040 int flags, stopper; 2041 int start = 1; 2042 2043 ASSERT(MUTEX_HELD(&q->ctq_lock)); 2044 2045 for (e = list_head(&q->ctq_events); e != NULL; e = next) { 2046 next = list_next(&q->ctq_events, e); 2047 flags = e->cte_flags; 2048 stopper = (q->ctq_listno != CTEL_PBUNDLE) && 2049 (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0); 2050 if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) { 2051 if ((start && (flags & (CTE_INFO | CTE_ACK))) || 2052 (e->cte_contract == ct)) { 2053 /* 2054 * Toss informative and ACKed critical messages. 2055 */ 2056 list_remove(&q->ctq_events, e); 2057 cte_rele(e); 2058 } 2059 } else if ((e->cte_contract == ct) && !stopper) { 2060 ASSERT(q->ctq_nlisteners != 0); 2061 e->cte_nodes[q->ctq_listno].ctm_trimmed = 1; 2062 } else if (ct && !stopper) { 2063 start = 0; 2064 } else { 2065 /* 2066 * Don't free messages past the first reader. 2067 */ 2068 break; 2069 } 2070 } 2071 } 2072 2073 /* 2074 * cte_queue_drain 2075 * 2076 * Drain all events from the specified queue, and mark it dead. If 2077 * "ack" is set, acknowledge any critical events we find along the 2078 * way. 2079 */ 2080 static void 2081 cte_queue_drain(ct_equeue_t *q, int ack) 2082 { 2083 ct_kevent_t *e, *next; 2084 ct_listener_t *l; 2085 2086 mutex_enter(&q->ctq_lock); 2087 2088 for (e = list_head(&q->ctq_events); e != NULL; e = next) { 2089 next = list_next(&q->ctq_events, e); 2090 if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) { 2091 /* 2092 * Make sure critical messages are eventually 2093 * removed from the bundle queues. 2094 */ 2095 mutex_enter(&e->cte_lock); 2096 e->cte_flags |= CTE_ACK; 2097 mutex_exit(&e->cte_lock); 2098 ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock)); 2099 e->cte_contract->ct_evcnt--; 2100 } 2101 list_remove(&q->ctq_events, e); 2102 e->cte_nodes[q->ctq_listno].ctm_refs = 0; 2103 e->cte_nodes[q->ctq_listno].ctm_nreliable = 0; 2104 e->cte_nodes[q->ctq_listno].ctm_trimmed = 0; 2105 cte_rele(e); 2106 } 2107 2108 /* 2109 * This is necessary only because of CTEL_PBUNDLE listeners; 2110 * the events they point to can move from one pbundle to 2111 * another. Fortunately, this only happens if the contract is 2112 * inherited, which (in turn) only happens if the process 2113 * exits, which means it's an all-or-nothing deal. If this 2114 * wasn't the case, we would instead need to keep track of 2115 * listeners on a per-event basis, not just a per-queue basis. 2116 * This would have the side benefit of letting us clean up 2117 * trimmed events sooner (i.e. immediately), but would 2118 * unfortunately make events even bigger than they already 2119 * are. 2120 */ 2121 for (l = list_head(&q->ctq_listeners); l; 2122 l = list_next(&q->ctq_listeners, l)) { 2123 l->ctl_flags |= CTLF_DEAD; 2124 if (l->ctl_position) { 2125 l->ctl_position = NULL; 2126 list_insert_tail(&q->ctq_tail, l); 2127 } 2128 cv_broadcast(&l->ctl_cv); 2129 } 2130 2131 /* 2132 * Disallow events. 2133 */ 2134 q->ctq_flags |= CTQ_DEAD; 2135 2136 /* 2137 * If we represent the last reference to a reference counted 2138 * process bundle queue, free it. 2139 */ 2140 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0)) 2141 cte_queue_destroy(q); 2142 else 2143 mutex_exit(&q->ctq_lock); 2144 } 2145 2146 /* 2147 * cte_publish 2148 * 2149 * Publishes an event to a specific queue. Only called by 2150 * cte_publish_all. 2151 */ 2152 static void 2153 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp) 2154 { 2155 ASSERT(MUTEX_HELD(&q->ctq_lock)); 2156 2157 q->ctq_atime = *tsp; 2158 2159 /* 2160 * Don't publish if the event is informative and there aren't 2161 * any listeners, or if the queue has been shut down. 2162 */ 2163 if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) || 2164 (q->ctq_flags & CTQ_DEAD)) { 2165 mutex_exit(&q->ctq_lock); 2166 cte_rele(e); 2167 return; 2168 } 2169 2170 /* 2171 * Enqueue event 2172 */ 2173 list_insert_tail(&q->ctq_events, e); 2174 2175 /* 2176 * Check for waiting listeners 2177 */ 2178 cte_qwakeup(q, e); 2179 2180 /* 2181 * Trim unnecessary events from the queue. 2182 */ 2183 cte_trim(q, NULL); 2184 mutex_exit(&q->ctq_lock); 2185 } 2186 2187 /* 2188 * cte_publish_all 2189 * 2190 * Publish an event to all necessary event queues. The event, e, must 2191 * be zallocated by the caller, and the event's flags and type must be 2192 * set. The rest of the event's fields are initialized here. 2193 */ 2194 uint64_t 2195 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata) 2196 { 2197 ct_equeue_t *q; 2198 timespec_t ts; 2199 uint64_t evid; 2200 ct_kevent_t *negev; 2201 int negend; 2202 2203 e->cte_contract = ct; 2204 e->cte_data = data; 2205 e->cte_gdata = gdata; 2206 e->cte_refs = 3; 2207 evid = e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1); 2208 contract_hold(ct); 2209 2210 /* 2211 * For a negotiation event we set the ct->ct_nevent field of the 2212 * contract for the duration of the negotiation 2213 */ 2214 negend = 0; 2215 if (e->cte_flags & CTE_NEG) { 2216 cte_hold(e); 2217 ct->ct_nevent = e; 2218 } else if (e->cte_type == CT_EV_NEGEND) { 2219 negend = 1; 2220 } 2221 2222 gethrestime(&ts); 2223 2224 /* 2225 * ct_evtlock simply (and only) ensures that two events sent 2226 * from the same contract are delivered to all queues in the 2227 * same order. 2228 */ 2229 mutex_enter(&ct->ct_evtlock); 2230 2231 /* 2232 * CTEL_CONTRACT - First deliver to the contract queue, acking 2233 * the event if the contract has been orphaned. 2234 */ 2235 mutex_enter(&ct->ct_lock); 2236 mutex_enter(&ct->ct_events.ctq_lock); 2237 if ((e->cte_flags & CTE_INFO) == 0) { 2238 if (ct->ct_state >= CTS_ORPHAN) 2239 e->cte_flags |= CTE_ACK; 2240 else 2241 ct->ct_evcnt++; 2242 } 2243 mutex_exit(&ct->ct_lock); 2244 cte_publish(&ct->ct_events, e, &ts); 2245 2246 /* 2247 * CTEL_BUNDLE - Next deliver to the contract type's bundle 2248 * queue. 2249 */ 2250 mutex_enter(&ct->ct_type->ct_type_events.ctq_lock); 2251 cte_publish(&ct->ct_type->ct_type_events, e, &ts); 2252 2253 /* 2254 * CTEL_PBUNDLE - Finally, if the contract has an owner, 2255 * deliver to the owner's process bundle queue. 2256 */ 2257 mutex_enter(&ct->ct_lock); 2258 if (ct->ct_owner) { 2259 /* 2260 * proc_exit doesn't free event queues until it has 2261 * abandoned all contracts. 2262 */ 2263 ASSERT(ct->ct_owner->p_ct_equeue); 2264 ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]); 2265 q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]; 2266 mutex_enter(&q->ctq_lock); 2267 mutex_exit(&ct->ct_lock); 2268 cte_publish(q, e, &ts); 2269 } else { 2270 mutex_exit(&ct->ct_lock); 2271 cte_rele(e); 2272 } 2273 2274 if (negend) { 2275 mutex_enter(&ct->ct_lock); 2276 negev = ct->ct_nevent; 2277 ct->ct_nevent = NULL; 2278 cte_rele(negev); 2279 mutex_exit(&ct->ct_lock); 2280 } 2281 2282 mutex_exit(&ct->ct_evtlock); 2283 2284 return (evid); 2285 } 2286 2287 /* 2288 * cte_add_listener 2289 * 2290 * Add a new listener to an event queue. 2291 */ 2292 void 2293 cte_add_listener(ct_equeue_t *q, ct_listener_t *l) 2294 { 2295 cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL); 2296 l->ctl_equeue = q; 2297 l->ctl_position = NULL; 2298 l->ctl_flags = 0; 2299 2300 mutex_enter(&q->ctq_lock); 2301 list_insert_head(&q->ctq_tail, l); 2302 list_insert_head(&q->ctq_listeners, l); 2303 q->ctq_nlisteners++; 2304 mutex_exit(&q->ctq_lock); 2305 } 2306 2307 /* 2308 * cte_remove_listener 2309 * 2310 * Remove a listener from an event queue. No other queue activities 2311 * (e.g. cte_get event) may be in progress at this endpoint when this 2312 * is called. 2313 */ 2314 void 2315 cte_remove_listener(ct_listener_t *l) 2316 { 2317 ct_equeue_t *q = l->ctl_equeue; 2318 ct_kevent_t *e; 2319 2320 mutex_enter(&q->ctq_lock); 2321 2322 ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0); 2323 2324 if ((e = l->ctl_position) != NULL) 2325 cte_qrele(q, l, e); 2326 else 2327 list_remove(&q->ctq_tail, l); 2328 l->ctl_position = NULL; 2329 2330 q->ctq_nlisteners--; 2331 list_remove(&q->ctq_listeners, l); 2332 2333 if (l->ctl_flags & CTLF_RELIABLE) 2334 q->ctq_nreliable--; 2335 2336 /* 2337 * If we are a the last listener of a dead reference counted 2338 * queue (i.e. a process bundle) we free it. Otherwise we just 2339 * trim any events which may have been kept around for our 2340 * benefit. 2341 */ 2342 if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) && 2343 (q->ctq_nlisteners == 0)) { 2344 cte_queue_destroy(q); 2345 } else { 2346 cte_trim(q, NULL); 2347 mutex_exit(&q->ctq_lock); 2348 } 2349 } 2350 2351 /* 2352 * cte_reset_listener 2353 * 2354 * Moves a listener's queue pointer to the beginning of the queue. 2355 */ 2356 void 2357 cte_reset_listener(ct_listener_t *l) 2358 { 2359 ct_equeue_t *q = l->ctl_equeue; 2360 2361 mutex_enter(&q->ctq_lock); 2362 2363 /* 2364 * We allow an asynchronous reset because it doesn't make a 2365 * whole lot of sense to make reset block or fail. We already 2366 * have most of the mechanism needed thanks to queue trimming, 2367 * so implementing it isn't a big deal. 2368 */ 2369 if (l->ctl_flags & CTLF_COPYOUT) 2370 l->ctl_flags |= CTLF_RESET; 2371 2372 (void) cte_qmove(q, l, list_head(&q->ctq_events)); 2373 2374 /* 2375 * Inform blocked readers. 2376 */ 2377 cv_broadcast(&l->ctl_cv); 2378 pollwakeup(&l->ctl_pollhead, POLLIN); 2379 mutex_exit(&q->ctq_lock); 2380 } 2381 2382 /* 2383 * cte_next_event 2384 * 2385 * Moves the event pointer for the specified listener to the next event 2386 * on the queue. To avoid races, this movement only occurs if the 2387 * specified event id matches that of the current event. This is used 2388 * primarily to skip events that have been read but whose extended data 2389 * haven't been copied out. 2390 */ 2391 int 2392 cte_next_event(ct_listener_t *l, uint64_t id) 2393 { 2394 ct_equeue_t *q = l->ctl_equeue; 2395 ct_kevent_t *old; 2396 2397 mutex_enter(&q->ctq_lock); 2398 2399 if (l->ctl_flags & CTLF_COPYOUT) 2400 l->ctl_flags |= CTLF_RESET; 2401 2402 if (((old = l->ctl_position) != NULL) && (old->cte_id == id)) 2403 (void) cte_qmove(q, l, list_next(&q->ctq_events, old)); 2404 2405 mutex_exit(&q->ctq_lock); 2406 2407 return (0); 2408 } 2409 2410 /* 2411 * cte_get_event 2412 * 2413 * Reads an event from an event endpoint. If "nonblock" is clear, we 2414 * block until a suitable event is ready. If "crit" is set, we only 2415 * read critical events. Note that while "cr" is the caller's cred, 2416 * "zuniqid" is the unique id of the zone the calling contract 2417 * filesystem was mounted in. 2418 */ 2419 int 2420 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr, 2421 uint64_t zuniqid, int crit) 2422 { 2423 ct_equeue_t *q = l->ctl_equeue; 2424 ct_kevent_t *temp; 2425 int result = 0; 2426 int partial = 0; 2427 size_t size, gsize, len; 2428 model_t mdl = get_udatamodel(); 2429 STRUCT_DECL(ct_event, ev); 2430 STRUCT_INIT(ev, mdl); 2431 2432 /* 2433 * cte_qreadable checks for CTLF_COPYOUT as well as ensures 2434 * that there exists, and we are pointing to, an appropriate 2435 * event. It may temporarily drop ctq_lock, but that doesn't 2436 * really matter to us. 2437 */ 2438 mutex_enter(&q->ctq_lock); 2439 while (cte_qreadable(q, l, cr, zuniqid, crit)) { 2440 if (nonblock) { 2441 result = EAGAIN; 2442 goto error; 2443 } 2444 if (q->ctq_flags & CTQ_DEAD) { 2445 result = EIDRM; 2446 goto error; 2447 } 2448 result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock); 2449 if (result == 0) { 2450 result = EINTR; 2451 goto error; 2452 } 2453 } 2454 temp = l->ctl_position; 2455 cte_hold(temp); 2456 l->ctl_flags |= CTLF_COPYOUT; 2457 mutex_exit(&q->ctq_lock); 2458 2459 /* 2460 * We now have an event. Copy in the user event structure to 2461 * see how much space we have to work with. 2462 */ 2463 result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev)); 2464 if (result) 2465 goto copyerr; 2466 2467 /* 2468 * Determine what data we have and what the user should be 2469 * allowed to see. 2470 */ 2471 size = gsize = 0; 2472 if (temp->cte_data) { 2473 VERIFY(nvlist_size(temp->cte_data, &size, 2474 NV_ENCODE_NATIVE) == 0); 2475 ASSERT(size != 0); 2476 } 2477 if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) { 2478 VERIFY(nvlist_size(temp->cte_gdata, &gsize, 2479 NV_ENCODE_NATIVE) == 0); 2480 ASSERT(gsize != 0); 2481 } 2482 2483 /* 2484 * If we have enough space, copy out the extended event data. 2485 */ 2486 len = size + gsize; 2487 if (len) { 2488 if (STRUCT_FGET(ev, ctev_nbytes) >= len) { 2489 char *buf = kmem_alloc(len, KM_SLEEP); 2490 2491 if (size) 2492 VERIFY(nvlist_pack(temp->cte_data, &buf, &size, 2493 NV_ENCODE_NATIVE, KM_SLEEP) == 0); 2494 if (gsize) { 2495 char *tmp = buf + size; 2496 2497 VERIFY(nvlist_pack(temp->cte_gdata, &tmp, 2498 &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0); 2499 } 2500 2501 /* This shouldn't have changed */ 2502 ASSERT(size + gsize == len); 2503 result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer), 2504 len); 2505 kmem_free(buf, len); 2506 if (result) 2507 goto copyerr; 2508 } else { 2509 partial = 1; 2510 } 2511 } 2512 2513 /* 2514 * Copy out the common event data. 2515 */ 2516 STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id); 2517 STRUCT_FSET(ev, ctev_evid, temp->cte_id); 2518 STRUCT_FSET(ev, ctev_cttype, 2519 temp->cte_contract->ct_type->ct_type_index); 2520 STRUCT_FSET(ev, ctev_flags, temp->cte_flags & 2521 (CTE_ACK|CTE_INFO|CTE_NEG)); 2522 STRUCT_FSET(ev, ctev_type, temp->cte_type); 2523 STRUCT_FSET(ev, ctev_nbytes, len); 2524 STRUCT_FSET(ev, ctev_goffset, size); 2525 result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev)); 2526 2527 copyerr: 2528 /* 2529 * Only move our location in the queue if all copyouts were 2530 * successful, the caller provided enough space for the entire 2531 * event, and our endpoint wasn't reset or otherwise moved by 2532 * another thread. 2533 */ 2534 mutex_enter(&q->ctq_lock); 2535 if (result) 2536 result = EFAULT; 2537 else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) && 2538 (l->ctl_position == temp)) 2539 (void) cte_qmove(q, l, list_next(&q->ctq_events, temp)); 2540 l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET); 2541 /* 2542 * Signal any readers blocked on our CTLF_COPYOUT. 2543 */ 2544 cv_signal(&l->ctl_cv); 2545 cte_rele(temp); 2546 2547 error: 2548 mutex_exit(&q->ctq_lock); 2549 return (result); 2550 } 2551 2552 /* 2553 * cte_set_reliable 2554 * 2555 * Requests that events be reliably delivered to an event endpoint. 2556 * Unread informative and acknowledged critical events will not be 2557 * removed from the queue until this listener reads or skips them. 2558 * Because a listener could maliciously request reliable delivery and 2559 * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the 2560 * caller's effective set. 2561 */ 2562 int 2563 cte_set_reliable(ct_listener_t *l, const cred_t *cr) 2564 { 2565 ct_equeue_t *q = l->ctl_equeue; 2566 int error; 2567 2568 if ((error = secpolicy_contract_event(cr)) != 0) 2569 return (error); 2570 2571 mutex_enter(&q->ctq_lock); 2572 if ((l->ctl_flags & CTLF_RELIABLE) == 0) { 2573 l->ctl_flags |= CTLF_RELIABLE; 2574 q->ctq_nreliable++; 2575 if (l->ctl_position != NULL) 2576 l->ctl_position->cte_nodes[q->ctq_listno]. 2577 ctm_nreliable++; 2578 } 2579 mutex_exit(&q->ctq_lock); 2580 2581 return (0); 2582 } 2583