1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This file contains global data and code shared between master and slave parts 31 * of the pseudo-terminal driver. 32 * 33 * Pseudo terminals (or pt's for short) are allocated dynamically. 34 * pt's are put in the global ptms_slots array indexed by minor numbers. 35 * 36 * The slots array is initially small (of the size NPTY_MIN). When more pt's are 37 * needed than the slot array size, the larger slot array is allocated and all 38 * opened pt's move to the new one. 39 * 40 * Resource allocation: 41 * 42 * pt_ttys structures are allocated via pt_ttys_alloc, which uses 43 * kmem_cache_alloc(). 44 * Minor number space is allocated via vmem_alloc() interface. 45 * ptms_slots arrays are allocated via kmem_alloc(). 46 * 47 * Minors are started from 1 instead of 0 because vmem_alloc returns 0 in case 48 * of failure. Also, in anticipation of removing clone device interface to 49 * pseudo-terminal subsystem, minor 0 should not be used. (Potential future 50 * development). 51 * 52 * Device entries in /dev/pts directory are created dynamically via 53 * ddi_create_minor_node(). It enqueues requests to suer-mode event daemon 54 * which actually creates entries asynchronously, so they may not be available 55 * immediately. For this reason we create devices before they are actually 56 * needed, so for each slot table extension we already have node creation 57 * requests queued. To avoid overflowing of the event daemon event queue we 58 * limit the maximum extension of the slot table by the pt_maxdelta tuneable. 59 * After the table slot size reaches pt_maxdelta, we stop 2^N extension 60 * algorithm and start extending the slot table size by pt_maxdelta. 61 * 62 * Synchronization: 63 * 64 * All global data synchronization between ptm/pts is done via global 65 * ptms_lock mutex which is implicitly initialized by declaring it global. 66 * 67 * Individual fields of pt_ttys structure (except ptm_rdq, pts_rdq and 68 * pt_nullmsg) are protected by pt_ttys.pt_lock mutex. 69 * 70 * PT_ENTER_READ/PT_ENTER_WRITE are reference counter based read-write locks 71 * which allow reader locks to be reacquired by the same thread (usual 72 * reader/writer locks can't be used for that purpose since it is illegal for 73 * a thread to acquire a lock it already holds, even as a reader). The sole 74 * purpose of these macros is to guarantee that the peer queue will not 75 * disappear (due to closing peer) while it is used. It is safe to use 76 * PT_ENTER_READ/PT_EXIT_READ brackets across calls like putq/putnext (since 77 * they are not real locks but reference counts). 78 * 79 * PT_ENTER_WRITE/PT_EXIT_WRITE brackets are used ONLY in master/slave 80 * open/close paths to modify ptm_rdq and pts_rdq fields. These fields should 81 * be set to appropriate queues *after* qprocson() is called during open (to 82 * prevent peer from accessing the queue with incomplete plumbing) and set to 83 * NULL before qprocsoff() is called during close. Put and service procedures 84 * use PT_ENTER_READ/PT_EXIT_READ to prevent peer closes. 85 * 86 * The pt_nullmsg field is only used in open/close routines and is also 87 * protected by PT_ENTER_WRITE/PT_EXIT_WRITE brackets to avoid extra mutex 88 * holds. 89 * 90 * Lock Ordering: 91 * 92 * If both ptms_lock and per-pty lock should be held, ptms_lock should always 93 * be entered first, followed by per-pty lock. 94 * 95 * Global functions: 96 * 97 * void ptms_init(void); 98 * 99 * Called by pts/ptm _init entry points. It performes one-time 100 * initialization needed for both pts and ptm. This initialization is done 101 * here and not in ptms_initspace because all these data structures are not 102 * needed if pseudo-terminals are not used in the system. 103 * 104 * struct pt_ttys *pt_ttys_alloc(void); 105 * 106 * Allocate new minor number and pseudo-terminal entry. May sleep. 107 * New minor number is recorded in pt_minor field of the entry returned. 108 * This routine also initializes pt_minor and pt_state fields of the new 109 * pseudo-terminal and puts a pointer to it into ptms_slots array. 110 * 111 * struct pt_ttys *ptms_minor2ptty(minor_t minor) 112 * 113 * Find pt_ttys structure by minor number. 114 * Returns NULL when minor is out of range. 115 * 116 * void ptms_close(struct pt_ttys *pt, uint_t flags_to_clear); 117 * 118 * Clear flags_to_clear in pt and if no one owns it (PTMOPEN/PTSOPEN not 119 * set) free pt entry and corresponding slot. 120 * 121 * Tuneables and configuration: 122 * 123 * pt_cnt: minimum number of pseudo-terminals in the system. The system 124 * should provide at least this number of ptys (provided sufficient 125 * memory is available). It is different from the older semantics 126 * of pt_cnt meaning maximum number of ptys. 127 * Set to 0 by default. 128 * 129 * pt_max_pty: Maximum number of pseudo-terminals in the system. The system 130 * should not allocate more ptys than pt_max_pty (although, it may 131 * impose stricter maximum). Zero value means no user-defined 132 * maximum. This is intended to be used as "denial-of-service" 133 * protection. 134 * Set to 0 by default. 135 * 136 * Both pt_cnt and pt_max_pty may be modified during system lifetime 137 * with their semantics preserved. 138 * 139 * pt_init_cnt: Initial size of ptms_slots array. Set to NPTY_INITIAL. 140 * 141 * pt_ptyofmem: Approximate percentage of system memory that may be 142 * occupied by pty data structures. Initially set to NPTY_PERCENT. 143 * This variable is used once during initialization to estimate 144 * maximum number of ptys in the system. The actual maximum is 145 * determined as minimum of pt_max_pty and calculated value. 146 * 147 * pt_maxdelta: Maximum extension chunk of the slot table. 148 */ 149 150 151 152 #include <sys/types.h> 153 #include <sys/param.h> 154 #include <sys/termios.h> 155 #include <sys/stream.h> 156 #include <sys/stropts.h> 157 #include <sys/kmem.h> 158 #include <sys/ptms.h> 159 #include <sys/stat.h> 160 #include <sys/sunddi.h> 161 #include <sys/ddi.h> 162 #include <sys/bitmap.h> 163 #include <sys/sysmacros.h> 164 #include <sys/ddi_impldefs.h> 165 #include <sys/zone.h> 166 #ifdef DEBUG 167 #include <sys/strlog.h> 168 #endif 169 170 171 /* Initial number of ptms slots */ 172 #define NPTY_INITIAL 16 173 174 #define NPTY_PERCENT 5 175 176 /* Maximum increment of the slot table size */ 177 #define PTY_MAXDELTA 128 178 179 /* 180 * Tuneable variables. 181 */ 182 uint_t pt_cnt = 0; /* Minimum number of ptys */ 183 size_t pt_max_pty = 0; /* Maximum number of ptys */ 184 uint_t pt_init_cnt = NPTY_INITIAL; /* Initial number of ptms slots */ 185 uint_t pt_pctofmem = NPTY_PERCENT; /* Percent of memory to use for ptys */ 186 uint_t pt_maxdelta = PTY_MAXDELTA; /* Max increment for slot table size */ 187 188 /* Other global variables */ 189 190 kmutex_t ptms_lock; /* Global data access lock */ 191 192 /* 193 * Slot array and its management variables 194 */ 195 static struct pt_ttys **ptms_slots = NULL; /* Slots for actual pt structures */ 196 static size_t ptms_nslots = 0; /* Size of slot array */ 197 static size_t ptms_ptymax = 0; /* Maximum number of ptys */ 198 static size_t ptms_inuse = 0; /* # of ptys currently allocated */ 199 static size_t ptms_bt_words = 0; /* Size of minor bitmap in words */ 200 static size_t ptms_bt_len = 0; /* Size of minor bitmap in bits */ 201 202 dev_info_t *pts_dip = NULL; /* private copy of slave devinfo ptr */ 203 204 static struct kmem_cache *ptms_cache = NULL; /* pty cache */ 205 206 static vmem_t *ptms_minor_arena = NULL; /* Arena for device minors */ 207 208 static ulong_t *ptms_bt = NULL; /* pty created minor node bitmap */ 209 210 static uint_t ptms_roundup(uint_t); 211 static int ptms_constructor(void *, void *, int); 212 static void ptms_destructor(void *, void *); 213 static minor_t ptms_grow(void); 214 215 /* 216 * Total size occupied by one pty. Each pty master/slave pair consumes one 217 * pointer for ptms_slots array, one pt_ttys structure and one empty message 218 * preallocated for pts close. 219 */ 220 221 #define PTY_SIZE (sizeof (struct pt_ttys) + \ 222 sizeof (struct pt_ttys *) + \ 223 sizeof (dblk_t)) 224 225 #ifdef DEBUG 226 int ptms_debug = 0; 227 #define PTMOD_ID 5 228 #endif 229 230 /* 231 * Clear all bits of x except the highest bit 232 */ 233 #define truncate(x) ((x) <= 2 ? (x) : (1 << (highbit(x) - 1))) 234 235 /* 236 * Roundup the number to the nearest power of 2 237 */ 238 static uint_t 239 ptms_roundup(uint_t x) 240 { 241 uint_t p = truncate(x); /* x with non-high bits stripped */ 242 243 /* 244 * If x is a power of 2, return x, otherwise roundup. 245 */ 246 return (p == x ? p : (p * 2)); 247 } 248 249 /* 250 * Allocate ptms_slots array and kmem cache for pt_ttys. This initialization is 251 * only called once during system lifetime. Called from ptm or pts _init 252 * routine. 253 */ 254 void 255 ptms_init(void) 256 { 257 mutex_enter(&ptms_lock); 258 259 if (ptms_slots == NULL) { 260 ptms_slots = kmem_zalloc(pt_init_cnt * 261 sizeof (struct pt_ttys *), KM_SLEEP); 262 263 ptms_cache = kmem_cache_create("pty_map", 264 sizeof (struct pt_ttys), 0, ptms_constructor, 265 ptms_destructor, NULL, NULL, NULL, 0); 266 267 /* Allocate bit map for created minor nodes */ 268 ptms_bt_len = pt_init_cnt * 2 + 1; 269 ptms_bt_words = howmany(ptms_bt_len, BT_NBIPUL); 270 ptms_bt = kmem_zalloc(sizeof (ulong_t) * ptms_bt_words, 271 KM_SLEEP); 272 273 ptms_nslots = pt_init_cnt; 274 275 /* Allocate integer space for minor numbers */ 276 ptms_minor_arena = vmem_create("ptms_minor", (void *)1, 277 ptms_nslots, 1, NULL, NULL, NULL, 0, 278 VM_SLEEP | VMC_IDENTIFIER); 279 280 /* 281 * Calculate available number of ptys - how many ptys can we 282 * allocate in pt_pctofmem % of available memory. The value is 283 * rounded up to the nearest power of 2. 284 */ 285 ptms_ptymax = ptms_roundup((pt_pctofmem * kmem_maxavail()) / 286 (100 * PTY_SIZE)); 287 } 288 mutex_exit(&ptms_lock); 289 } 290 291 static void 292 ptms_create_node(dev_info_t *devi, minor_t i) 293 { 294 char name[22]; /* For representing 64-bit minor + NUL */ 295 296 (void) snprintf(name, sizeof (name), "%d", i); 297 if (ddi_create_minor_node(devi, name, S_IFCHR, 298 i, DDI_PSEUDO, NULL) == DDI_SUCCESS) { 299 BT_SET(ptms_bt, i); 300 } 301 } 302 303 /* 304 * Create nodes in /dev/pts directory. 305 * Called from pts_attach. 306 */ 307 int 308 ptms_create_pts_nodes(dev_info_t *devi) 309 { 310 uint_t i; 311 312 mutex_enter(&ptms_lock); 313 pts_dip = devi; 314 315 /* 316 * /dev/pts/0 is not used, but some applications may check it, so create 317 * it also. 318 * 319 * Create all minor nodes that have been pre-allocated in ptms_init(). 320 */ 321 for (i = 0; i <= pt_init_cnt * 2; i++) 322 ptms_create_node(devi, i); 323 324 mutex_exit(&ptms_lock); 325 326 return (DDI_SUCCESS); 327 } 328 329 /* 330 * Destroy nodes in /dev/pts directory. 331 * Called from pts_detach. 332 */ 333 int 334 ptms_destroy_pts_nodes(dev_info_t *devi) 335 { 336 mutex_enter(&ptms_lock); 337 ddi_remove_minor_node(devi, NULL); 338 if (ptms_bt != NULL && ptms_bt_words > 0) { 339 /* Clear bitmap since all minor nodes have been removed */ 340 bzero(ptms_bt, sizeof (ulong_t) * ptms_bt_words); 341 } 342 pts_dip = NULL; 343 mutex_exit(&ptms_lock); 344 return (DDI_SUCCESS); 345 } 346 347 /* 348 * Allocate new minor number and pseudo-terminal entry. Returns the new entry or 349 * NULL if no memory or maximum number of entries reached. 350 */ 351 struct pt_ttys * 352 pt_ttys_alloc(void) 353 { 354 minor_t dminor; 355 struct pt_ttys *pt = NULL; 356 357 mutex_enter(&ptms_lock); 358 359 /* 360 * Always try to allocate new pty when pt_cnt minimum limit is not 361 * achieved. If it is achieved, the maximum is determined by either 362 * user-specified value (if it is non-zero) or our memory estimations - 363 * whatever is less. 364 */ 365 if (ptms_inuse >= pt_cnt) { 366 /* 367 * When system achieved required minimum of ptys, check for the 368 * denial of service limits. 369 * 370 * Since pt_max_pty may be zero, the formula below is used to 371 * avoid conditional expression. It will equal to pt_max_pty if 372 * it is not zero and ptms_ptymax otherwise. 373 */ 374 size_t user_max = (pt_max_pty == 0 ? ptms_ptymax : pt_max_pty); 375 376 /* Do not try to allocate more than allowed */ 377 if (ptms_inuse >= min(ptms_ptymax, user_max)) { 378 mutex_exit(&ptms_lock); 379 return (NULL); 380 } 381 } 382 ptms_inuse++; 383 384 /* 385 * Allocate new minor number. If this fails, all slots are busy and 386 * we need to grow the hash. 387 */ 388 dminor = (minor_t)(uintptr_t) 389 vmem_alloc(ptms_minor_arena, 1, VM_NOSLEEP); 390 391 if (dminor == 0) { 392 /* Grow the cache and retry allocation */ 393 dminor = ptms_grow(); 394 } 395 396 if (dminor == 0) { 397 /* Not enough memory now */ 398 ptms_inuse--; 399 mutex_exit(&ptms_lock); 400 return (NULL); 401 } 402 403 if (BT_TEST(ptms_bt, dminor) == 0) { 404 /* 405 * Retry failed node creation. 406 */ 407 if (pts_dip != NULL) 408 ptms_create_node(pts_dip, dminor); 409 } 410 411 pt = kmem_cache_alloc(ptms_cache, KM_NOSLEEP); 412 if (pt == NULL) { 413 /* Not enough memory - this entry can't be used now. */ 414 vmem_free(ptms_minor_arena, (void *)(uintptr_t)dminor, 1); 415 ptms_inuse--; 416 } else { 417 pt->pt_minor = dminor; 418 pt->pt_pid = curproc->p_pid; /* For debugging */ 419 pt->pt_state = (PTMOPEN | PTLOCK); 420 pt->pt_zoneid = getzoneid(); 421 ASSERT(ptms_slots[dminor - 1] == NULL); 422 ptms_slots[dminor - 1] = pt; 423 } 424 425 mutex_exit(&ptms_lock); 426 return (pt); 427 } 428 429 /* 430 * Get pt_ttys structure by minor number. 431 * Returns NULL when minor is out of range. 432 */ 433 struct pt_ttys * 434 ptms_minor2ptty(minor_t dminor) 435 { 436 struct pt_ttys *pt = NULL; 437 438 ASSERT(mutex_owned(&ptms_lock)); 439 if ((dminor >= 1) && (dminor <= ptms_nslots) && ptms_slots != NULL) 440 pt = ptms_slots[dminor - 1]; 441 442 return (pt); 443 } 444 445 /* 446 * Close the pt and clear flags_to_clear. 447 * If pt device is not opened by someone else, free it and clear its slot. 448 */ 449 void 450 ptms_close(struct pt_ttys *pt, uint_t flags_to_clear) 451 { 452 uint_t flags; 453 454 ASSERT(MUTEX_NOT_HELD(&ptms_lock)); 455 ASSERT(pt != NULL); 456 457 mutex_enter(&ptms_lock); 458 459 mutex_enter(&pt->pt_lock); 460 pt->pt_state &= ~flags_to_clear; 461 flags = pt->pt_state; 462 mutex_exit(&pt->pt_lock); 463 464 if (! (flags & (PTMOPEN | PTSOPEN))) { 465 /* No one owns the entry - free it */ 466 467 ASSERT(pt->ptm_rdq == NULL); 468 ASSERT(pt->pts_rdq == NULL); 469 ASSERT(pt->pt_nullmsg == NULL); 470 ASSERT(pt->pt_refcnt == 0); 471 ASSERT(pt->pt_minor <= ptms_nslots); 472 ASSERT(ptms_slots[pt->pt_minor - 1] == pt); 473 ASSERT(ptms_inuse > 0); 474 475 ptms_inuse--; 476 477 pt->pt_pid = 0; 478 479 ptms_slots[pt->pt_minor - 1] = NULL; 480 /* Return minor number to the pool of minors */ 481 vmem_free(ptms_minor_arena, (void *)(uintptr_t)pt->pt_minor, 1); 482 /* Return pt to the cache */ 483 kmem_cache_free(ptms_cache, pt); 484 } 485 mutex_exit(&ptms_lock); 486 } 487 488 /* 489 * Allocate another slot table twice as large as the original one (limited to 490 * global maximum). Migrate all pt to the new slot table and free the original 491 * one. Create more /devices entries for new devices. 492 */ 493 static minor_t 494 ptms_grow() 495 { 496 minor_t old_size = ptms_nslots; 497 minor_t delta = MIN(pt_maxdelta, old_size); 498 minor_t new_size = old_size + delta; 499 minor_t new_delta = MIN(pt_maxdelta, new_size); 500 struct pt_ttys **ptms_old = ptms_slots; 501 struct pt_ttys **ptms_new; 502 ulong_t *new_bt; 503 size_t new_bt_words; 504 size_t new_bt_len; 505 void *vaddr; /* vmem_add return value */ 506 minor_t i; 507 508 ASSERT(MUTEX_HELD(&ptms_lock)); 509 510 DDBG("ptmopen(%d): need to grow\n", (int)ptms_inuse); 511 512 /* Allocate new ptms array */ 513 ptms_new = kmem_zalloc(new_size * sizeof (struct pt_ttys *), 514 KM_NOSLEEP); 515 if (ptms_new == NULL) 516 return ((minor_t)0); 517 518 /* Allocate new ptms bitmap */ 519 new_bt_len = ptms_bt_len + new_delta; 520 new_bt_words = howmany(new_bt_len, BT_NBIPUL); 521 new_bt = kmem_zalloc(sizeof (ulong_t) * new_bt_words, KM_NOSLEEP); 522 if (new_bt == NULL) { 523 kmem_free(ptms_new, new_size * sizeof (struct pt_ttys *)); 524 return ((minor_t)0); 525 } 526 527 /* Increase clone index space */ 528 vaddr = vmem_add(ptms_minor_arena, (void *)(uintptr_t)(old_size + 1), 529 new_size - old_size, VM_NOSLEEP); 530 531 if (vaddr == NULL) { 532 kmem_free(ptms_new, new_size * sizeof (struct pt_ttys *)); 533 kmem_free(new_bt, sizeof (ulong_t) * new_bt_words); 534 return ((minor_t)0); 535 } 536 537 /* Migrate pt entries to a new location */ 538 ptms_nslots = new_size; 539 bcopy(ptms_old, ptms_new, old_size * sizeof (struct pt_ttys *)); 540 ptms_slots = ptms_new; 541 kmem_free(ptms_old, old_size * sizeof (struct pt_ttys *)); 542 543 /* Migrate bitmap entries to a new location */ 544 bt_copy(ptms_bt, new_bt, ptms_bt_words); 545 kmem_free(ptms_bt, sizeof (ulong_t) * ptms_bt_words); 546 ptms_bt = new_bt; 547 ptms_bt_words = new_bt_words; 548 ptms_bt_len = new_bt_len; 549 550 /* 551 * Add new or previously failed /devices entries. 552 * Devices are created asynchronously via event daemon requests, so we 553 * pre-create devices before they are actually needed. 554 * Faster performance could be obtained by keeping track of 555 * the last uncreated node, rather than searching. 556 */ 557 if (pts_dip != NULL) { 558 for (i = bt_availbit(ptms_bt, ptms_bt_len); i < ptms_bt_len; 559 i++) { 560 if (BT_TEST(ptms_bt, i) == 0) 561 ptms_create_node(pts_dip, i); 562 } 563 } 564 565 /* Allocate minor number and return it */ 566 return ((minor_t)(uintptr_t) 567 vmem_alloc(ptms_minor_arena, 1, VM_NOSLEEP)); 568 } 569 570 /*ARGSUSED*/ 571 static int 572 ptms_constructor(void *maddr, void *arg, int kmflags) 573 { 574 struct pt_ttys *pt = maddr; 575 576 pt->pts_rdq = NULL; 577 pt->ptm_rdq = NULL; 578 pt->pt_nullmsg = NULL; 579 pt->pt_pid = NULL; 580 pt->pt_minor = NULL; 581 pt->pt_refcnt = 0; 582 pt->pt_state = 0; 583 pt->pt_zoneid = GLOBAL_ZONEID; 584 585 cv_init(&pt->pt_cv, NULL, CV_DEFAULT, NULL); 586 mutex_init(&pt->pt_lock, NULL, MUTEX_DEFAULT, NULL); 587 return (0); 588 } 589 590 /*ARGSUSED*/ 591 static void 592 ptms_destructor(void *maddr, void *arg) 593 { 594 struct pt_ttys *pt = maddr; 595 596 ASSERT(pt->pt_refcnt == 0); 597 ASSERT(pt->pt_state == 0); 598 ASSERT(pt->ptm_rdq == NULL); 599 ASSERT(pt->pts_rdq == NULL); 600 601 mutex_destroy(&pt->pt_lock); 602 cv_destroy(&pt->pt_cv); 603 } 604 605 #ifdef DEBUG 606 void 607 ptms_log(char *str, uint_t arg) 608 { 609 if (ptms_debug) { 610 if (ptms_debug & 2) 611 cmn_err(CE_CONT, str, arg); 612 if (ptms_debug & 4) 613 (void) strlog(PTMOD_ID, -1, 0, SL_TRACE | SL_ERROR, 614 str, arg); 615 else 616 (void) strlog(PTMOD_ID, -1, 0, SL_TRACE, str, arg); 617 } 618 } 619 620 void 621 ptms_logp(char *str, uintptr_t arg) 622 { 623 if (ptms_debug) { 624 if (ptms_debug & 2) 625 cmn_err(CE_CONT, str, arg); 626 if (ptms_debug & 4) 627 (void) strlog(PTMOD_ID, -1, 0, SL_TRACE | SL_ERROR, 628 str, arg); 629 else 630 (void) strlog(PTMOD_ID, -1, 0, SL_TRACE, str, arg); 631 } 632 } 633 #endif 634