1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/cred.h> 32 #include <sys/modctl.h> 33 #include <sys/vfs.h> 34 #include <sys/sysmacros.h> 35 #include <sys/cmn_err.h> 36 #include <sys/stat.h> 37 #include <sys/errno.h> 38 #include <sys/kmem.h> 39 #include <sys/file.h> 40 #include <sys/kstat.h> 41 #include <sys/port_impl.h> 42 #include <sys/task.h> 43 #include <sys/project.h> 44 45 /* 46 * Event Ports can be shared across threads or across processes. 47 * Every thread/process can use an own event port or a group of them 48 * can use a single port. A major request was also to get the ability 49 * to submit user-defined events to a port. The idea of the 50 * user-defined events is to use the event ports for communication between 51 * threads/processes (like message queues). User defined-events are queued 52 * in a port with the same priority as other event types. 53 * 54 * Events are delivered only once. The thread/process which is waiting 55 * for events with the "highest priority" (priority here is related to the 56 * internal strategy to wakeup waiting threads) will retrieve the event, 57 * all other threads/processes will not be notified. There is also 58 * the requirement to have events which should be submitted immediately 59 * to all "waiting" threads. That is the main task of the alert event. 60 * The alert event is submitted by the application to a port. The port 61 * changes from a standard mode to the alert mode. Now all waiting threads 62 * will be awaken immediately and they will return with the alert event. 63 * Threads trying to retrieve events from a port in alert mode will 64 * return immediately with the alert event. 65 * 66 * 67 * An event port is like a kernel queue, which accept events submitted from 68 * user level as well as events submitted from kernel sub-systems. Sub-systems 69 * able to submit events to a port are the so-called "event sources". 70 * Current event sources: 71 * PORT_SOURCE_AIO : events submitted per transaction completion from 72 * POSIX-I/O framework. 73 * PORT_SOURCE_TIMER : events submitted when a timer fires 74 * (see timer_create(3RT)). 75 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)). 76 * PORT_SOURCE_ALERT : events submitted from user. This is not really a 77 * single event, this is actually a port mode 78 * (see port_alert(3c)). 79 * PORT_SOURCE_USER : events submitted by applications with 80 * port_send(3c) or port_sendn(3c). 81 * 82 * There is a user API implemented in the libc library as well as a 83 * kernel API implemented in port_subr.c in genunix. 84 * The available user API functions are: 85 * port_create() : create a port as a file descriptor of portfs file system 86 * The standard close(2) function closes a port. 87 * port_associate() : associate a file descriptor with a port to be able to 88 * retrieve events from that file descriptor. 89 * port_dissociate(): remove the association of a file descriptor with a port. 90 * port_alert() : set/unset a port in alert mode 91 * port_send() : send an event of type PORT_SOURCE_USER to a port 92 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports 93 * port_get() : retrieve a single event from a port 94 * port_getn() : retrieve a list of events from a port 95 * 96 * The available kernel API functions are: 97 * port_allocate_event(): allocate an event slot/structure of/from a port 98 * port_init_event() : set event data in the event structure 99 * port_send_event() : send event to a port 100 * port_free_event() : deliver allocated slot/structure back to a port 101 * port_associate_ksource(): associate a kernel event source with a port 102 * port_dissociate_ksource(): dissociate a kernel event source from a port 103 * 104 * The libc implementation consists of small functions which pass the 105 * arguments to the kernel using the "portfs" system call. It means, all the 106 * synchronisation work is being done in the kernel. The "portfs" system 107 * call loads the portfs file system into the kernel. 108 * 109 * PORT CREATION 110 * The first function to be used is port_create() which internally creates 111 * a vnode and a portfs node. The portfs node is represented by the port_t 112 * structure, which again includes all the data necessary to control a port. 113 * port_create() returns a file descriptor, which needs to be used in almost 114 * all other event port functions. 115 * The maximum number of ports per system is controlled by the resource 116 * control: project:port-max-ids. 117 * 118 * EVENT GENERATION 119 * The second step is the triggering of events, which could be sent to a port. 120 * Every event source implements an own method to generate events for a port: 121 * PORT_SOURCE_AIO: 122 * The sigevent structure of the standard POSIX-IO functions 123 * was extended by an additional notification type. 124 * Standard notification types: 125 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD 126 * Event ports introduced now SIGEV_PORT. 127 * The notification type SIGEV_PORT specifies that a structure 128 * of type port_notify_t has to be attached to the sigev_value. 129 * The port_notify_t structure contains the event port file 130 * descriptor and a user-defined pointer. 131 * Internally the AIO implementation will use the kernel API 132 * functions to allocate an event port slot per transaction (aiocb) 133 * and sent the event to the port as soon as the transaction completes. 134 * All the events submitted per transaction are of type 135 * PORT_SOURCE_AIO. 136 * PORT_SOURCE_TIMER: 137 * The timer_create() function uses the same method as the 138 * PORT_SOURCE_AIO event source. It also uses the sigevent structure 139 * to deliver the port information. 140 * Internally the timer code will allocate a single event slot/struct 141 * per timer and it will send the timer event as soon as the timer 142 * fires. If the timer-fired event is not delivered to the application 143 * before the next period elapsed, then an overrun counter will be 144 * incremented. The timer event source uses a callback function to 145 * detect the delivery of the event to the application. At that time 146 * the timer callback function will update the event overrun counter. 147 * PORT_SOURCE_FD: 148 * This event source uses the port_associate() function to allocate 149 * an event slot/struct from a port. The application defines in the 150 * events argument of port_associate() the type of events which it is 151 * interested on. 152 * The internal pollwakeup() function is used by all the file 153 * systems --which are supporting the VOP_POLL() interface- to notify 154 * the upper layer (poll(2), devpoll(7d) and now event ports) about 155 * the event triggered (see valid events in poll(2)). 156 * The pollwakeup() function forwards the event to the layer registered 157 * to receive the current event. 158 * The port_dissociate() function can be used to free the allocated 159 * event slot from the port. Anyway, file descriptors deliver events 160 * only one time and remain deactivated until the application 161 * reactivates the association of a file descriptor with port_associate(). 162 * If an associated file descriptor is closed then the file descriptor 163 * will be dissociated automatically from the port. 164 * 165 * PORT_SOURCE_ALERT: 166 * This event type is generated when the port was previously set in 167 * alert mode using the port_alert() function. 168 * A single alert event is delivered to every thread which tries to 169 * retrieve events from a port. 170 * PORT_SOURCE_USER: 171 * This type of event is generated from user level using the port_send() 172 * function to send a user event to a port or the port_sendn() function 173 * to send an event to a list of ports. 174 * 175 * EVENT DELIVERY / RETRIEVING EVENTS 176 * Events remain in the port queue until: 177 * - the application uses port_get() or port_getn() to retrieve events, 178 * - the event source cancel the event, 179 * - the event port is closed or 180 * - the process exits. 181 * The maximal number of events in a port queue is the maximal number 182 * of event slots/structures which can be allocated by event sources. 183 * The allocation of event slots/structures is controlled by the resource 184 * control: process.port-max-events. 185 * The port_get() function retrieves a single event and the port_getn() 186 * function retrieves a list of events. 187 * Events are classified as shareable and non-shareable events across processes. 188 * Non-shareable events are invisible for the port_get(n)() functions of 189 * processes other than the owner of the event. 190 * Shareable event types are: 191 * PORT_SOURCE_USER events 192 * This type of event is unconditionally shareable and without 193 * limitations. If the parent process sends a user event and closes 194 * the port afterwards, the event remains in the port and the child 195 * process will still be able to retrieve the user event. 196 * PORT_SOURCE_ALERT events 197 * This type of event is shareable between processes. 198 * Limitation: The alert mode of the port is removed if the owner 199 * (process which set the port in alert mode) of the 200 * alert event closes the port. 201 * PORT_SOURCE_FD events 202 * This type of event is conditional shareable between processes. 203 * After fork(2) all forked file descriptors are shareable between 204 * the processes. The child process is allowed to retrieve events 205 * from the associated file descriptors and it can also re-associate 206 * the fd with the port. 207 * Limitations: The child process is not allowed to dissociate 208 * the file descriptor from the port. Only the 209 * owner (process) of the association is allowed to 210 * dissociate the file descriptor from the port. 211 * If the owner of the association closes the port 212 * the association will be removed. 213 * PORT_SOURCE_AIO events 214 * This type of event is not shareable between processes. 215 * PORT_SOURCE_TIMER events 216 * This type of event is not shareable between processes. 217 * 218 * FORK BEHAVIOUR 219 * On fork(2) the child process inherits all opened file descriptors from 220 * the parent process. This is also valid for port file descriptors. 221 * Associated file descriptors with a port maintain the association across the 222 * fork(2). It means, the child process gets full access to the port and 223 * it can retrieve events from all common associated file descriptors. 224 * Events of file descriptors created and associated with a port after the 225 * fork(2) are non-shareable and can only be retrieved by the same process. 226 * 227 * If the parent or the child process closes an exported port (using fork(2) 228 * or I_SENDFD) all the file descriptors associated with the port by the 229 * process will be dissociated from the port. Events of dissociated file 230 * descriptors as well as all non-shareable events will be discarded. 231 * The other process can continue working with the port as usual. 232 * 233 * CLOSING A PORT 234 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details. 235 * 236 * PORT EVENT STRUCTURES 237 * The global control structure of the event ports framework is port_control_t. 238 * port_control_t keeps track of the number of created ports in the system. 239 * The cache of the port event structures is also located in port_control_t. 240 * 241 * On port_create() the vnode and the portfs node is also created. 242 * The portfs node is represented by the port_t structure. 243 * The port_t structure manages all port specific tasks: 244 * - management of resource control values 245 * - port VOP_POLL interface 246 * - creation time 247 * - uid and gid of the port 248 * 249 * The port_t structure contains the port_queue_t structure. 250 * The port_queue_t structure contains all the data necessary for the 251 * queue management: 252 * - locking 253 * - condition variables 254 * - event counters 255 * - submitted events (represented by port_kevent_t structures) 256 * - threads waiting for event delivery (check portget_t structure) 257 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure) 258 * - event source management (managed by the port_source_t structure) 259 * - alert mode management (check port_alert_t structure) 260 * 261 * EVENT MANAGEMENT 262 * The event port file system creates a kmem_cache for internal allocation of 263 * event port structures. 264 * 265 * 1. Event source association with a port: 266 * The first step to do for event sources is to get associated with a port 267 * using the port_associate_ksource() function or adding an entry to the 268 * port_ksource_tab[]. An event source can get dissociated from a port 269 * using the port_dissociate_ksource() function. An entry in the 270 * port_ksource_tab[] implies that the source will be associated 271 * automatically with every new created port. 272 * The event source can deliver a callback function, which is used by the 273 * port to notify the event source about close(2). The idea is that 274 * in such a case the event source should free all allocated resources 275 * and it must return to the port all allocated slots/structures. 276 * The port_close() function will wait until all allocated event 277 * structures/slots are returned to the port. 278 * The callback function is not necessary when the event source does not 279 * maintain local resources, a second condition is that the event source 280 * can guarantee that allocated event slots will be returned without 281 * delay to the port (it will not block and sleep somewhere). 282 * 283 * 2. Reservation of an event slot / event structure 284 * The event port reliability is based on the reservation of an event "slot" 285 * (allocation of an event structure) by the event source as part of the 286 * application call. If the maximal number of event slots is exhausted then 287 * the event source can return a corresponding error code to the application. 288 * 289 * The port_alloc_event() function has to be used by event sources to 290 * allocate an event slot (reserve an event structure). The port_alloc_event() 291 * doesn not block and it will return a 0 value on success or an error code 292 * if it fails. 293 * An argument of port_alloc_event() is a flag which determines the behavior 294 * of the event after it was delivered to the application: 295 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the 296 * application. 297 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event 298 * source. This kind of slots can not be used for 299 * event delivery and should only be used internally 300 * by the event source. 301 * PORT_KEV_CACHED : event slot remains under the control of an event 302 * port cache. It does not become free after delivery 303 * to the application. 304 * PORT_ALLOC_SCACHED : event slot remains under the control of the event 305 * source. The event source takes the control over 306 * the slot after the event is delivered to the 307 * application. 308 * 309 * 3. Delivery of events to the event port 310 * Earlier allocated event structure/slot has to be used to deliver 311 * event data to the port. Event source has to use the function 312 * port_send_event(). The single argument is a pointer to the previously 313 * reserved event structure/slot. 314 * The portkev_events field of the port_kevent_t structure can be updated/set 315 * in two ways: 316 * 1. using the port_set_event() function, or 317 * 2. updating the portkev_events field out of the callback function: 318 * The event source can deliver a callback function to the port as an 319 * argument of port_init_event(). 320 * One of the arguments of the callback function is a pointer to the 321 * events field, which will be delivered to the application. 322 * (see Delivery of events to the application). 323 * Event structures/slots can be delivered to the event port only one time, 324 * they remain blocked until the data is delivered to the application and the 325 * slot becomes free or it is delivered back to the event source 326 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above 327 * is at the same time the indicator for the event source that the event 328 * structure/slot is free for reuse. 329 * 330 * 4. Delivery of events to the application 331 * The events structures/slots delivered by event sources remain in the 332 * port queue until they are retrieved by the application or the port 333 * is closed (exit(2) also closes all opened file descriptors).. 334 * The application uses port_get() or port_getn() to retrieve events from 335 * a port. port_get() retrieves a single event structure/slot and port_getn() 336 * retrieves a list of event structures/slots. 337 * Both functions are able to poll for events and return immediately or they 338 * can specify a timeout value. 339 * Before the events are delivered to the application they are moved to a 340 * second temporary internal queue. The idea is to avoid lock collisions or 341 * contentions of the global queue lock. 342 * The global queue lock is used every time when an event source delivers 343 * new events to the port. 344 * The port_get() and port_getn() functions 345 * a) retrieve single events from the temporary queue, 346 * b) prepare the data to be passed to the application memory, 347 * c) activate the callback function of the event sources: 348 * - to get the latest event data, 349 * - the event source can free all allocated resources associated with the 350 * current event, 351 * - the event source can re-use the current event slot/structure 352 * - the event source can deny the delivery of the event to the application 353 * (e.g. because of the wrong process). 354 * d) put the event back to the temporary queue if the event delivery was denied 355 * e) repeat a) until d) as long as there are events in the queue and 356 * there is enough user space available. 357 * 358 * The loop described above could block for a very long time the global mutex, 359 * to avoid that a second mutex was introduced to synchronized concurrent 360 * threads accessing the temporary queue. 361 */ 362 363 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, 364 uintptr_t); 365 366 static struct sysent port_sysent = { 367 6, 368 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 369 (int (*)())portfs, 370 }; 371 372 static struct modlsys modlsys = { 373 &mod_syscallops, "event ports", &port_sysent 374 }; 375 376 #ifdef _SYSCALL32_IMPL 377 378 static int64_t 379 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4, 380 uint32_t arg5, uint32_t arg6); 381 382 static struct sysent port_sysent32 = { 383 6, 384 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 385 (int (*)())portfs32, 386 }; 387 388 static struct modlsys modlsys32 = { 389 &mod_syscallops32, 390 "32-bit event ports syscalls", 391 &port_sysent32 392 }; 393 #endif /* _SYSCALL32_IMPL */ 394 395 static struct modlinkage modlinkage = { 396 MODREV_1, 397 &modlsys, 398 #ifdef _SYSCALL32_IMPL 399 &modlsys32, 400 #endif 401 NULL 402 }; 403 404 port_kstat_t port_kstat = { 405 { "ports", KSTAT_DATA_UINT32 } 406 }; 407 408 dev_t portdev; 409 struct vnodeops *port_vnodeops; 410 struct vfs port_vfs; 411 412 extern rctl_hndl_t rc_process_portev; 413 extern rctl_hndl_t rc_project_portids; 414 extern void aio_close_port(void *, int, pid_t, int); 415 416 /* 417 * This table contains a list of event sources which need a static 418 * association with a port (every port). 419 * The last NULL entry in the table is required to detect "end of table". 420 */ 421 struct port_ksource port_ksource_tab[] = { 422 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL}, 423 {0, NULL, NULL, NULL} 424 }; 425 426 /* local functions */ 427 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *, 428 port_gettimer_t *); 429 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *); 430 static int port_alert(port_t *, int, int, void *); 431 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *); 432 static int port_send(port_t *, int, int, void *); 433 static int port_create(int *); 434 static int port_get_alert(port_alert_t *, port_event_t *); 435 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *); 436 static int *port_errorn(int *, int, int, int); 437 static int port_noshare(void *, int *, pid_t, int, void *); 438 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *, 439 int); 440 static void port_init(port_t *); 441 static void port_remove_alert(port_queue_t *); 442 static void port_add_ksource_local(port_t *, port_ksource_t *); 443 static void port_check_return_cond(port_queue_t *); 444 static void port_dequeue_thread(port_queue_t *, portget_t *); 445 static portget_t *port_queue_thread(port_queue_t *, uint_t); 446 static void port_kstat_init(void); 447 448 #ifdef _SYSCALL32_IMPL 449 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *); 450 #endif 451 452 int 453 _init(void) 454 { 455 static const fs_operation_def_t port_vfsops_template[] = { 456 NULL, NULL 457 }; 458 extern const fs_operation_def_t port_vnodeops_template[]; 459 vfsops_t *port_vfsops; 460 int error; 461 major_t major; 462 463 if ((major = getudev()) == (major_t)-1) 464 return (ENXIO); 465 portdev = makedevice(major, 0); 466 467 /* Create a dummy vfs */ 468 error = vfs_makefsops(port_vfsops_template, &port_vfsops); 469 if (error) { 470 cmn_err(CE_WARN, "port init: bad vfs ops"); 471 return (error); 472 } 473 vfs_setops(&port_vfs, port_vfsops); 474 port_vfs.vfs_flag = VFS_RDONLY; 475 port_vfs.vfs_dev = portdev; 476 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0); 477 478 error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops); 479 if (error) { 480 vfs_freevfsops(port_vfsops); 481 cmn_err(CE_WARN, "port init: bad vnode ops"); 482 return (error); 483 } 484 485 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL); 486 port_control.pc_nents = 0; /* number of active ports */ 487 488 /* create kmem_cache for port event structures */ 489 port_control.pc_cache = kmem_cache_create("port_cache", 490 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 491 492 port_kstat_init(); /* init port kstats */ 493 return (mod_install(&modlinkage)); 494 } 495 496 int 497 _info(struct modinfo *modinfop) 498 { 499 return (mod_info(&modlinkage, modinfop)); 500 } 501 502 /* 503 * System call wrapper for all port related system calls from 32-bit programs. 504 */ 505 #ifdef _SYSCALL32_IMPL 506 static int64_t 507 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, 508 uint32_t a4) 509 { 510 int64_t error; 511 512 switch (opcode & PORT_CODE_MASK) { 513 case PORT_GET: 514 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4); 515 break; 516 case PORT_SENDN: 517 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4); 518 break; 519 default: 520 error = portfs(opcode, a0, a1, a2, a3, a4); 521 break; 522 } 523 return (error); 524 } 525 #endif /* _SYSCALL32_IMPL */ 526 527 /* 528 * System entry point for port functions. 529 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE). 530 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a 531 * port file descriptor as first argument. 532 */ 533 static int64_t 534 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, 535 uintptr_t a4) 536 { 537 rval_t r; 538 port_t *pp; 539 int error = 0; 540 uint_t nget; 541 file_t *fp; 542 port_gettimer_t port_timer; 543 544 r.r_vals = 0; 545 if (opcode & PORT_SYS_NOPORT) { 546 opcode &= PORT_CODE_MASK; 547 if (opcode == PORT_SENDN) { 548 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2, 549 (int)a3, (void *)a4, (uint_t *)&r.r_val1); 550 if (error && (error != EIO)) 551 return ((int64_t)set_errno(error)); 552 return (r.r_vals); 553 } 554 555 if (opcode == PORT_CREATE) { 556 error = port_create(&r.r_val1); 557 if (error) 558 return ((int64_t)set_errno(error)); 559 return (r.r_vals); 560 } 561 } 562 563 /* opcodes using port as first argument (a0) */ 564 565 if ((fp = getf((int)a0)) == NULL) 566 return ((uintptr_t)set_errno(EBADF)); 567 568 if (fp->f_vnode->v_type != VPORT) { 569 releasef((int)a0); 570 return ((uintptr_t)set_errno(EBADFD)); 571 } 572 573 pp = VTOEP(fp->f_vnode); 574 575 switch (opcode & PORT_CODE_MASK) { 576 case PORT_GET: 577 { 578 /* see PORT_GETN description */ 579 struct timespec timeout; 580 581 port_timer.pgt_flags = PORTGET_ONE; 582 port_timer.pgt_loop = 0; 583 port_timer.pgt_rqtp = NULL; 584 if (a4 != NULL) { 585 port_timer.pgt_timeout = &timeout; 586 timeout.tv_sec = (time_t)a2; 587 timeout.tv_nsec = (long)a3; 588 } else { 589 port_timer.pgt_timeout = NULL; 590 } 591 do { 592 nget = 1; 593 error = port_getn(pp, (port_event_t *)a1, 1, 594 (uint_t *)&nget, &port_timer); 595 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 596 break; 597 } 598 case PORT_GETN: 599 { 600 /* 601 * port_getn() can only retrieve own or shareable events from 602 * other processes. The port_getn() function remains in the 603 * kernel until own or shareable events are available or the 604 * timeout elapses. 605 */ 606 port_timer.pgt_flags = 0; 607 port_timer.pgt_loop = 0; 608 port_timer.pgt_rqtp = NULL; 609 port_timer.pgt_timeout = (struct timespec *)a4; 610 do { 611 nget = a3; 612 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2, 613 (uint_t *)&nget, &port_timer); 614 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 615 r.r_val1 = nget; 616 r.r_val2 = error; 617 releasef((int)a0); 618 if (error && error != ETIME) 619 return ((int64_t)set_errno(error)); 620 return (r.r_vals); 621 } 622 case PORT_ASSOCIATE: 623 { 624 /* currently only PORT_SOURCE_FD is implemented */ 625 if ((int)a1 != PORT_SOURCE_FD) { 626 error = EINVAL; 627 break; 628 } 629 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, (int)a3, 630 (void *)a4); 631 break; 632 } 633 case PORT_SEND: 634 { 635 /* user-defined events */ 636 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2); 637 break; 638 } 639 case PORT_DISPATCH: 640 { 641 /* 642 * library events, blocking 643 * Only events of type PORT_SOURCE_AIO are currently allowed. 644 */ 645 if ((int)a1 != PORT_SOURCE_AIO) { 646 error = EINVAL; 647 break; 648 } 649 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2, 650 (uintptr_t)a3, (void *)a4); 651 break; 652 } 653 case PORT_DISSOCIATE: 654 { 655 /* currently only PORT_SOURCE_FD is implemented */ 656 if ((int)a1 != PORT_SOURCE_FD) { 657 error = EINVAL; 658 break; 659 } 660 error = port_dissociate_fd(pp, (uintptr_t)a2); 661 break; 662 } 663 case PORT_ALERT: 664 { 665 if ((int)a2) /* a2 = events */ 666 error = port_alert(pp, (int)a1, (int)a2, (void *)a3); 667 else 668 port_remove_alert(&pp->port_queue); 669 break; 670 } 671 default: 672 error = EINVAL; 673 break; 674 } 675 676 releasef((int)a0); 677 if (error) 678 return ((int64_t)set_errno(error)); 679 return (r.r_vals); 680 } 681 682 /* 683 * System call to create a port. 684 * 685 * The port_create() function creates a vnode of type VPORT per port. 686 * The port control data is associated with the vnode as vnode private data. 687 * The port_create() function returns an event port file descriptor. 688 */ 689 static int 690 port_create(int *fdp) 691 { 692 port_t *pp; 693 vnode_t *vp; 694 struct file *fp; 695 proc_t *p = curproc; 696 697 /* initialize vnode and port private data */ 698 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP); 699 700 pp->port_vnode = vn_alloc(KM_SLEEP); 701 vp = EPTOV(pp); 702 vn_setops(vp, port_vnodeops); 703 vp->v_type = VPORT; 704 vp->v_vfsp = &port_vfs; 705 vp->v_data = (caddr_t)pp; 706 707 mutex_enter(&port_control.pc_mutex); 708 /* 709 * Retrieve the maximal number of event ports allowed per system from 710 * the resource control: project.port-max-ids. 711 */ 712 mutex_enter(&p->p_lock); 713 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p, 714 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) { 715 mutex_exit(&p->p_lock); 716 vn_free(vp); 717 kmem_free(pp, sizeof (port_t)); 718 mutex_exit(&port_control.pc_mutex); 719 return (EAGAIN); 720 } 721 722 /* 723 * Retrieve the maximal number of events allowed per port from 724 * the resource control: process.port-max-events. 725 */ 726 pp->port_max_events = rctl_enforced_value(rc_process_portev, 727 p->p_rctls, p); 728 mutex_exit(&p->p_lock); 729 730 /* allocate a new user file descriptor and a file structure */ 731 if (falloc(vp, 0, &fp, fdp)) { 732 /* 733 * If the file table is full, free allocated resources. 734 */ 735 vn_free(vp); 736 kmem_free(pp, sizeof (port_t)); 737 mutex_exit(&port_control.pc_mutex); 738 return (EMFILE); 739 } 740 741 /* set user file pointer */ 742 if (fdp != NULL) 743 setf(*fdp, fp); 744 mutex_exit(&fp->f_tlock); 745 746 pp->port_fd = *fdp; 747 port_control.pc_nents++; 748 p->p_portcnt++; 749 port_kstat.pks_ports.value.ui32++; 750 mutex_exit(&port_control.pc_mutex); 751 752 /* initializes port private data */ 753 port_init(pp); 754 return (0); 755 } 756 757 /* 758 * port_init() initializes event port specific data 759 */ 760 static void 761 port_init(port_t *pp) 762 { 763 port_queue_t *portq; 764 port_ksource_t *pks; 765 766 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL); 767 portq = &pp->port_queue; 768 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL); 769 pp->port_flags |= PORT_INIT; 770 771 /* 772 * If it is not enough memory available to satisfy a user 773 * request using a single port_getn() call then port_getn() 774 * will reduce the size of the list to PORT_MAX_LIST. 775 */ 776 pp->port_max_list = port_max_list; 777 778 /* Set timestamp entries required for fstat(2) requests */ 779 gethrestime(&pp->port_ctime); 780 pp->port_uid = crgetuid(curproc->p_cred); 781 pp->port_gid = crgetgid(curproc->p_cred); 782 783 /* initialize port queue structs */ 784 list_create(&portq->portq_list, sizeof (port_kevent_t), 785 offsetof(port_kevent_t, portkev_node)); 786 list_create(&portq->portq_get_list, sizeof (port_kevent_t), 787 offsetof(port_kevent_t, portkev_node)); 788 portq->portq_flags = 0; 789 pp->port_pid = curproc->p_pid; 790 791 /* Allocate cache skeleton for PORT_SOURCE_FD events */ 792 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP); 793 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL); 794 795 /* 796 * Allocate cache skeleton for association of event sources. 797 */ 798 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL); 799 portq->portq_scache = kmem_zalloc( 800 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP); 801 802 /* 803 * pre-associate some kernel sources with this port. 804 * The pre-association is required to create port_source_t 805 * structures for object association. 806 * Some sources can not get associated with a port before the first 807 * object association is requested. Another reason to pre_associate 808 * a particular source with a port is because of performance. 809 */ 810 811 for (pks = port_ksource_tab; pks->pks_source != 0; pks++) 812 port_add_ksource_local(pp, pks); 813 } 814 815 /* 816 * The port_add_ksource_local() function is being used to associate 817 * event sources with every new port. 818 * The event sources need to be added to port_ksource_tab[]. 819 */ 820 static void 821 port_add_ksource_local(port_t *pp, port_ksource_t *pks) 822 { 823 port_source_t *pse; 824 port_source_t **ps; 825 826 mutex_enter(&pp->port_queue.portq_source_mutex); 827 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)]; 828 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { 829 if (pse->portsrc_source == pks->pks_source) 830 break; 831 } 832 833 if (pse == NULL) { 834 /* associate new source with the port */ 835 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP); 836 pse->portsrc_source = pks->pks_source; 837 pse->portsrc_close = pks->pks_close; 838 pse->portsrc_closearg = pks->pks_closearg; 839 pse->portsrc_cnt = 1; 840 841 pks->pks_portsrc = pse; 842 if (*ps != NULL) 843 pse->portsrc_next = (*ps)->portsrc_next; 844 *ps = pse; 845 } 846 mutex_exit(&pp->port_queue.portq_source_mutex); 847 } 848 849 /* 850 * The port_send() function sends an event of type "source" to a 851 * port. This function is non-blocking. An event can be sent to 852 * a port as long as the number of events per port does not achieve the 853 * maximal allowed number of events. The max. number of events per port is 854 * defined by the resource control process.max-port-events. 855 * This function is used by the port library function port_send() 856 * and port_dispatch(). The port_send(3c) function is part of the 857 * event ports API and submits events of type PORT_SOURCE_USER. The 858 * port_dispatch() function is project private and it is used by library 859 * functions to submit events of other types than PORT_SOURCE_USER 860 * (e.g. PORT_SOURCE_AIO). 861 */ 862 static int 863 port_send(port_t *pp, int source, int events, void *user) 864 { 865 port_kevent_t *pev; 866 int error; 867 868 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev); 869 if (error) 870 return (error); 871 872 pev->portkev_object = 0; 873 pev->portkev_events = events; 874 pev->portkev_user = user; 875 pev->portkev_callback = NULL; 876 pev->portkev_arg = NULL; 877 pev->portkev_flags = 0; 878 879 error = port_send_event(pev); 880 if (error) { 881 port_free_event_local(pev, 0); 882 return (error); 883 } 884 return (0); 885 } 886 887 /* 888 * The port_noshare() function returns 0 if the current event was generated 889 * by the same process. Otherwise is returns a value other than 0 and the 890 * event should not be delivered to the current processe. 891 * The port_noshare() function is normally used by the port_dispatch() 892 * function. The port_dispatch() function is project private and can only be 893 * used within the event port project. 894 * Currently the libaio uses the port_dispatch() function to deliver events 895 * of types PORT_SOURCE_AIO. 896 */ 897 /* ARGSUSED */ 898 static int 899 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp) 900 { 901 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid) 902 return (1); 903 return (0); 904 } 905 906 /* 907 * The port_dispatch_event() function is project private and it is used by 908 * libraries involved in the project to deliver events to the port. 909 * port_dispatch will sleep and wait for enough resources to satisfy the 910 * request, if necessary. 911 * The library can specify if the delivered event is shareable with other 912 * processes (see PORT_SYS_NOSHARE flag). 913 */ 914 static int 915 port_dispatch_event(port_t *pp, int opcode, int source, int events, 916 uintptr_t object, void *user) 917 { 918 port_kevent_t *pev; 919 int error; 920 921 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev); 922 if (error) 923 return (error); 924 925 pev->portkev_object = object; 926 pev->portkev_events = events; 927 pev->portkev_user = user; 928 pev->portkev_arg = NULL; 929 if (opcode & PORT_SYS_NOSHARE) { 930 pev->portkev_flags = PORT_KEV_NOSHARE; 931 pev->portkev_callback = port_noshare; 932 } else { 933 pev->portkev_flags = 0; 934 pev->portkev_callback = NULL; 935 } 936 937 error = port_send_event(pev); 938 if (error) { 939 port_free_event_local(pev, 0); 940 return (error); 941 } 942 return (0); 943 } 944 945 946 /* 947 * The port_sendn() function is the kernel implementation of the event 948 * port API function port_sendn(3c). 949 * This function is able to send an event to a list of event ports. 950 */ 951 static int 952 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user, 953 uint_t *nget) 954 { 955 port_kevent_t *pev; 956 int errorcnt = 0; 957 int error = 0; 958 int count; 959 int port; 960 int *plist; 961 int *elist = NULL; 962 file_t *fp; 963 port_t *pp; 964 965 if (nent == 0 || nent > port_max_list) 966 return (EINVAL); 967 968 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP); 969 if (copyin((void *)ports, plist, nent * sizeof (int))) { 970 kmem_free(plist, nent * sizeof (int)); 971 return (EFAULT); 972 } 973 974 /* 975 * Scan the list for event port file descriptors and send the 976 * attached user event data embedded in a event of type 977 * PORT_SOURCE_USER to every event port in the list. 978 * If a list entry is not a valid event port then the corresponding 979 * error code will be stored in the errors[] list with the same 980 * list offset as in the ports[] list. 981 */ 982 983 for (count = 0; count < nent; count++) { 984 port = plist[count]; 985 if ((fp = getf(port)) == NULL) { 986 elist = port_errorn(elist, nent, EBADF, count); 987 errorcnt++; 988 continue; 989 } 990 991 pp = VTOEP(fp->f_vnode); 992 if (fp->f_vnode->v_type != VPORT) { 993 releasef(port); 994 elist = port_errorn(elist, nent, EBADFD, count); 995 errorcnt++; 996 continue; 997 } 998 999 error = port_alloc_event_local(pp, PORT_SOURCE_USER, 1000 PORT_ALLOC_DEFAULT, &pev); 1001 if (error) { 1002 releasef(port); 1003 elist = port_errorn(elist, nent, error, count); 1004 errorcnt++; 1005 continue; 1006 } 1007 1008 pev->portkev_object = 0; 1009 pev->portkev_events = events; 1010 pev->portkev_user = user; 1011 pev->portkev_callback = NULL; 1012 pev->portkev_arg = NULL; 1013 pev->portkev_flags = 0; 1014 1015 (void) port_send_event(pev); 1016 releasef(port); 1017 } 1018 if (errorcnt) { 1019 error = EIO; 1020 if (copyout(elist, (void *)errors, nent * sizeof (int))) 1021 error = EFAULT; 1022 kmem_free(elist, nent * sizeof (int)); 1023 } 1024 *nget = nent - errorcnt; 1025 kmem_free(plist, nent * sizeof (int)); 1026 return (error); 1027 } 1028 1029 static int * 1030 port_errorn(int *elist, int nent, int error, int index) 1031 { 1032 if (elist == NULL) 1033 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP); 1034 elist[index] = error; 1035 return (elist); 1036 } 1037 1038 /* 1039 * port_alert() 1040 * The port_alert() funcion is a high priority event and it is always set 1041 * on top of the queue. It is also delivered as single event. 1042 * flags: 1043 * - SET :overwrite current alert data 1044 * - UPDATE:set alert data or return EBUSY if alert mode is already set 1045 * 1046 * - set the ALERT flag 1047 * - wakeup all sleeping threads 1048 */ 1049 static int 1050 port_alert(port_t *pp, int flags, int events, void *user) 1051 { 1052 port_queue_t *portq; 1053 portget_t *pgetp; 1054 port_alert_t *pa; 1055 1056 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID) 1057 return (EINVAL); 1058 1059 portq = &pp->port_queue; 1060 pa = &portq->portq_alert; 1061 mutex_enter(&portq->portq_mutex); 1062 1063 /* check alert conditions */ 1064 if (flags == PORT_ALERT_UPDATE) { 1065 if (portq->portq_flags & PORTQ_ALERT) { 1066 mutex_exit(&portq->portq_mutex); 1067 return (EBUSY); 1068 } 1069 } 1070 1071 /* 1072 * Store alert data in the port to be delivered to threads 1073 * which are using port_get(n) to retrieve events. 1074 */ 1075 1076 portq->portq_flags |= PORTQ_ALERT; 1077 pa->portal_events = events; /* alert info */ 1078 pa->portal_pid = curproc->p_pid; /* process owner */ 1079 pa->portal_object = 0; /* no object */ 1080 pa->portal_user = user; /* user alert data */ 1081 1082 /* alert and deliver alert data to waiting threads */ 1083 pgetp = portq->portq_thread; 1084 if (pgetp == NULL) { 1085 /* no threads waiting for events */ 1086 mutex_exit(&portq->portq_mutex); 1087 return (0); 1088 } 1089 1090 /* 1091 * Set waiting threads in alert mode (PORTGET_ALERT).. 1092 * Every thread waiting for events already allocated a portget_t 1093 * structure to sleep on. 1094 * The port alert arguments are stored in the portget_t structure. 1095 * The PORTGET_ALERT flag is set to indicate the thread to return 1096 * immediately with the alert event. 1097 */ 1098 do { 1099 if ((pgetp->portget_state & PORTGET_ALERT) == 0) { 1100 pa = &pgetp->portget_alert; 1101 pa->portal_events = events; 1102 pa->portal_object = 0; 1103 pa->portal_user = user; 1104 pgetp->portget_state |= PORTGET_ALERT; 1105 cv_signal(&pgetp->portget_cv); 1106 } 1107 } while ((pgetp = pgetp->portget_next) != portq->portq_thread); 1108 mutex_exit(&portq->portq_mutex); 1109 return (0); 1110 } 1111 1112 /* 1113 * Clear alert state of the port 1114 */ 1115 static void 1116 port_remove_alert(port_queue_t *portq) 1117 { 1118 mutex_enter(&portq->portq_mutex); 1119 portq->portq_flags &= ~PORTQ_ALERT; 1120 mutex_exit(&portq->portq_mutex); 1121 } 1122 1123 /* 1124 * The port_getn() function is used to retrieve events from a port. 1125 * 1126 * The port_getn() function returns immediately if there are enough events 1127 * available in the port to satisfy the request or if the port is in alert 1128 * mode (see port_alert(3c)). 1129 * The timeout argument of port_getn(3c) -which is embedded in the 1130 * port_gettimer_t structure- specifies if the system call should block or if it 1131 * should return immediately depending on the number of events available. 1132 * This function is internally used by port_getn(3c) as well as by 1133 * port_get(3c). 1134 */ 1135 static int 1136 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget, 1137 port_gettimer_t *pgt) 1138 { 1139 port_queue_t *portq; 1140 port_kevent_t *pev; 1141 port_kevent_t *lev; 1142 int error = 0; 1143 uint_t nmax; 1144 uint_t nevents; 1145 uint_t eventsz; 1146 port_event_t *kevp; 1147 list_t *glist; 1148 uint_t tnent; 1149 int rval; 1150 int blocking = -1; 1151 int timecheck; 1152 int flag; 1153 timespec_t rqtime; 1154 timespec_t *rqtp = NULL; 1155 portget_t *pgetp; 1156 void *results; 1157 model_t model = get_udatamodel(); 1158 1159 flag = pgt->pgt_flags; 1160 1161 if (*nget > max && max > 0) 1162 return (EINVAL); 1163 1164 portq = &pp->port_queue; 1165 mutex_enter(&portq->portq_mutex); 1166 if (max == 0) { 1167 /* 1168 * Return number of objects with events 1169 * The portq_block_mutex is required to synchronize this 1170 * thread with another possible thread, which could be 1171 * retrieving events from the port queue. 1172 */ 1173 mutex_enter(&portq->portq_block_mutex); 1174 /* 1175 * Check if a second thread is currently retrieving events 1176 * and it is using the temporary event queue. 1177 */ 1178 if (portq->portq_tnent) { 1179 /* put remaining events back to the port queue */ 1180 port_push_eventq(portq); 1181 } 1182 *nget = portq->portq_nent; 1183 mutex_exit(&portq->portq_block_mutex); 1184 mutex_exit(&portq->portq_mutex); 1185 return (0); 1186 } 1187 1188 if (uevp == NULL) { 1189 mutex_exit(&portq->portq_mutex); 1190 return (EFAULT); 1191 } 1192 if (*nget == 0) { /* no events required */ 1193 mutex_exit(&portq->portq_mutex); 1194 return (0); 1195 } 1196 1197 /* port is being closed ... */ 1198 if (portq->portq_flags & PORTQ_CLOSE) { 1199 mutex_exit(&portq->portq_mutex); 1200 return (EBADFD); 1201 } 1202 1203 /* return immediately if port in alert mode */ 1204 if (portq->portq_flags & PORTQ_ALERT) { 1205 error = port_get_alert(&portq->portq_alert, uevp); 1206 if (error == 0) 1207 *nget = 1; 1208 mutex_exit(&portq->portq_mutex); 1209 return (error); 1210 } 1211 1212 portq->portq_thrcnt++; 1213 1214 /* 1215 * Now check if the completed events satisfy the 1216 * "wait" requirements of the current thread: 1217 */ 1218 1219 if (pgt->pgt_loop) { 1220 /* 1221 * loop entry of same thread 1222 * pgt_loop is set when the current thread returns 1223 * prematurely from this function. That could happen 1224 * when a port is being shared between processes and 1225 * this thread could not find events to return. 1226 * It is not allowed to a thread to retrieve non-shareable 1227 * events generated in other processes. 1228 * PORTQ_WAIT_EVENTS is set when a thread already 1229 * checked the current event queue and no new events 1230 * are added to the queue. 1231 */ 1232 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) && 1233 (portq->portq_nent >= *nget)) { 1234 /* some new events arrived ...check them */ 1235 goto portnowait; 1236 } 1237 rqtp = pgt->pgt_rqtp; 1238 timecheck = pgt->pgt_timecheck; 1239 pgt->pgt_flags |= PORTGET_WAIT_EVENTS; 1240 } else { 1241 /* check if enough events are available ... */ 1242 if (portq->portq_nent >= *nget) 1243 goto portnowait; 1244 /* 1245 * There are not enough events available to satisfy 1246 * the request, check timeout value and wait for 1247 * incoming events. 1248 */ 1249 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp, 1250 &blocking, flag); 1251 if (error) { 1252 port_check_return_cond(portq); 1253 mutex_exit(&portq->portq_mutex); 1254 return (error); 1255 } 1256 1257 if (blocking == 0) /* don't block, check fired events */ 1258 goto portnowait; 1259 1260 if (rqtp != NULL) { 1261 timespec_t now; 1262 timecheck = timechanged; 1263 gethrestime(&now); 1264 timespecadd(rqtp, &now); 1265 } 1266 } 1267 1268 /* enqueue thread in the list of waiting threads */ 1269 pgetp = port_queue_thread(portq, *nget); 1270 1271 1272 /* Wait here until return conditions met */ 1273 for (;;) { 1274 if (pgetp->portget_state & PORTGET_ALERT) { 1275 /* reap alert event and return */ 1276 error = port_get_alert(&pgetp->portget_alert, uevp); 1277 if (error) 1278 *nget = 0; 1279 else 1280 *nget = 1; 1281 port_dequeue_thread(&pp->port_queue, pgetp); 1282 portq->portq_thrcnt--; 1283 mutex_exit(&portq->portq_mutex); 1284 return (error); 1285 } 1286 1287 /* 1288 * Check if some other thread is already retrieving 1289 * events (portq_getn > 0). 1290 */ 1291 1292 if ((portq->portq_getn == 0) && 1293 ((portq)->portq_nent >= *nget) && 1294 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) || 1295 !((portq)->portq_flags & PORTQ_WAIT_EVENTS))) 1296 break; 1297 1298 if (portq->portq_flags & PORTQ_CLOSE) { 1299 error = EBADFD; 1300 break; 1301 } 1302 1303 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex, 1304 rqtp, timecheck); 1305 1306 if (rval <= 0) { 1307 error = (rval == 0) ? EINTR : ETIME; 1308 break; 1309 } 1310 } 1311 1312 /* take thread out of the wait queue */ 1313 port_dequeue_thread(portq, pgetp); 1314 1315 if (error != 0 && (error == EINTR || error == EBADFD || 1316 (error == ETIME && flag))) { 1317 /* return without events */ 1318 port_check_return_cond(portq); 1319 mutex_exit(&portq->portq_mutex); 1320 return (error); 1321 } 1322 1323 portnowait: 1324 nmax = max < portq->portq_nent ? max : portq->portq_nent; 1325 1326 /* 1327 * Move port event queue to a temporary event queue . 1328 * New incoming events will be continue be posted to the event queue 1329 * and they will not be considered by the current thread. 1330 * The idea is to avoid lock contentions or an often locking/unlocking 1331 * of the port queue mutex. The contention and performance degradation 1332 * could happen because: 1333 * a) incoming events use the port queue mutex to enqueue new events and 1334 * b) before the event can be delivered to the application it is 1335 * necessary to notify the event sources about the event delivery. 1336 * Sometimes the event sources can require a long time to return and 1337 * the queue mutex would block incoming events. 1338 * During this time incoming events (port_send_event()) do not need 1339 * to awake threads waiting for events. Before the current thread 1340 * returns it will check the conditions to awake other waiting threads. 1341 */ 1342 portq->portq_getn++; /* number of threads retrieving events */ 1343 mutex_enter(&portq->portq_block_mutex); /* block other threads here */ 1344 if (portq->portq_tnent) { 1345 /* 1346 * Move remaining events from previous thread back to the 1347 * port event queue. 1348 */ 1349 port_push_eventq(portq); 1350 } 1351 /* move port event queue to a temporary queue */ 1352 list_move_tail(&portq->portq_get_list, &portq->portq_list); 1353 glist = &portq->portq_get_list; /* use temporary event queue */ 1354 tnent = portq->portq_nent; /* get current number of events */ 1355 portq->portq_nent = 0; /* no events in the port event queue */ 1356 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */ 1357 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */ 1358 1359 if (model == DATAMODEL_NATIVE) { 1360 eventsz = sizeof (port_event_t); 1361 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1362 if (kevp == NULL) { 1363 if (nmax > pp->port_max_list) 1364 nmax = pp->port_max_list; 1365 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP); 1366 } 1367 results = kevp; 1368 lev = NULL; /* start with first event in the queue */ 1369 for (nevents = 0; nevents < nmax; ) { 1370 pev = port_get_kevent(glist, lev); 1371 if (pev == NULL) /* no more events available */ 1372 break; 1373 if (pev->portkev_flags & PORT_KEV_FREE) { 1374 /* Just discard event */ 1375 list_remove(glist, pev); 1376 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1377 if (PORT_FREE_EVENT(pev)) 1378 port_free_event_local(pev, 0); 1379 tnent--; 1380 continue; 1381 } 1382 1383 /* move event data to copyout list */ 1384 if (port_copy_event(&kevp[nevents], pev, glist)) { 1385 /* 1386 * Event can not be delivered to the 1387 * current process. 1388 */ 1389 if (lev != NULL) 1390 list_insert_after(glist, lev, pev); 1391 else 1392 list_insert_head(glist, pev); 1393 lev = pev; /* last checked event */ 1394 } else { 1395 nevents++; /* # of events ready */ 1396 } 1397 } 1398 #ifdef _SYSCALL32_IMPL 1399 } else { 1400 port_event32_t *kevp32; 1401 1402 eventsz = sizeof (port_event32_t); 1403 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1404 if (kevp32 == NULL) { 1405 if (nmax > pp->port_max_list) 1406 nmax = pp->port_max_list; 1407 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP); 1408 } 1409 results = kevp32; 1410 lev = NULL; /* start with first event in the queue */ 1411 for (nevents = 0; nevents < nmax; ) { 1412 pev = port_get_kevent(glist, lev); 1413 if (pev == NULL) /* no more events available */ 1414 break; 1415 if (pev->portkev_flags & PORT_KEV_FREE) { 1416 /* Just discard event */ 1417 list_remove(glist, pev); 1418 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1419 if (PORT_FREE_EVENT(pev)) 1420 port_free_event_local(pev, 0); 1421 tnent--; 1422 continue; 1423 } 1424 1425 /* move event data to copyout list */ 1426 if (port_copy_event32(&kevp32[nevents], pev, glist)) { 1427 /* 1428 * Event can not be delivered to the 1429 * current process. 1430 */ 1431 if (lev != NULL) 1432 list_insert_after(glist, lev, pev); 1433 else 1434 list_insert_head(glist, pev); 1435 lev = pev; /* last checked event */ 1436 } else { 1437 nevents++; /* # of events ready */ 1438 } 1439 } 1440 #endif /* _SYSCALL32_IMPL */ 1441 } 1442 1443 /* 1444 * Remember number of remaining events in the temporary event queue. 1445 */ 1446 portq->portq_tnent = tnent - nevents; 1447 mutex_exit(&portq->portq_block_mutex); 1448 1449 /* 1450 * Work to do before return : 1451 * - push list of remaining events back to the top of the standard 1452 * port queue. 1453 * - if this is the last thread calling port_get(n) then wakeup the 1454 * thread waiting on close(2). 1455 * - check for a deferred cv_signal from port_send_event() and wakeup 1456 * the sleeping thread. 1457 */ 1458 1459 mutex_enter(&portq->portq_mutex); 1460 if (portq->portq_tnent) { 1461 /* 1462 * move remaining events in the temporary event queue back 1463 * to the port event queue 1464 */ 1465 port_push_eventq(portq); 1466 } 1467 portq->portq_getn--; /* update # of threads retrieving events */ 1468 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */ 1469 /* Last thread => check close(2) conditions ... */ 1470 if (portq->portq_flags & PORTQ_CLOSE) { 1471 cv_signal(&portq->portq_closecv); 1472 mutex_exit(&portq->portq_mutex); 1473 kmem_free(results, eventsz * nmax); 1474 /* do not copyout events */ 1475 *nget = 0; 1476 return (EBADFD); 1477 } 1478 } else if (portq->portq_getn == 0) { 1479 /* 1480 * no other threads retrieving events ... 1481 * check wakeup conditions of sleeping threads 1482 */ 1483 if ((portq->portq_thread != NULL) && 1484 (portq->portq_nent >= portq->portq_nget)) 1485 cv_signal(&portq->portq_thread->portget_cv); 1486 } 1487 1488 /* 1489 * Check PORTQ_POLLIN here because the current thread set temporarily 1490 * the number of events in the queue to zero. 1491 */ 1492 if (portq->portq_flags & PORTQ_POLLIN) { 1493 portq->portq_flags &= ~PORTQ_POLLIN; 1494 mutex_exit(&portq->portq_mutex); 1495 pollwakeup(&pp->port_pollhd, POLLIN); 1496 } else { 1497 mutex_exit(&portq->portq_mutex); 1498 } 1499 1500 /* now copyout list of user event structures to user space */ 1501 if (nevents) { 1502 if (copyout(results, uevp, nevents * eventsz)) 1503 error = EFAULT; 1504 } 1505 kmem_free(results, eventsz * nmax); 1506 1507 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) { 1508 /* no events retrieved: check loop conditions */ 1509 if (blocking == -1) { 1510 /* no timeout checked */ 1511 error = port_get_timeout(pgt->pgt_timeout, 1512 &pgt->pgt_rqtime, &rqtp, &blocking, flag); 1513 if (error) { 1514 *nget = nevents; 1515 return (error); 1516 } 1517 if (rqtp != NULL) { 1518 timespec_t now; 1519 pgt->pgt_timecheck = timechanged; 1520 gethrestime(&now); 1521 timespecadd(&pgt->pgt_rqtime, &now); 1522 } 1523 pgt->pgt_rqtp = rqtp; 1524 } else { 1525 /* timeout already checked -> remember values */ 1526 pgt->pgt_rqtp = rqtp; 1527 if (rqtp != NULL) { 1528 pgt->pgt_timecheck = timecheck; 1529 pgt->pgt_rqtime = *rqtp; 1530 } 1531 } 1532 if (blocking) 1533 /* timeout remaining */ 1534 pgt->pgt_loop = 1; 1535 } 1536 1537 /* set number of user event structures completed */ 1538 *nget = nevents; 1539 return (error); 1540 } 1541 1542 /* 1543 * 1. copy kernel event structure to user event structure. 1544 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1545 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1546 * 4. Other types of event structures can be delivered back to the port cache 1547 * (port_free_event_local()). 1548 * 5. The event source callback function is the last opportunity for the 1549 * event source to update events, to free local resources associated with 1550 * the event or to deny the delivery of the event. 1551 */ 1552 static int 1553 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list) 1554 { 1555 int free_event = 0; 1556 int flags; 1557 int error; 1558 1559 puevp->portev_source = pkevp->portkev_source; 1560 puevp->portev_object = pkevp->portkev_object; 1561 puevp->portev_user = pkevp->portkev_user; 1562 puevp->portev_events = pkevp->portkev_events; 1563 1564 /* remove event from the queue */ 1565 list_remove(list, pkevp); 1566 1567 /* 1568 * Events of type PORT_KEV_WIRED remain allocated by the 1569 * event source. 1570 */ 1571 flags = pkevp->portkev_flags; 1572 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1573 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1574 else 1575 free_event = 1; 1576 1577 if (pkevp->portkev_callback) { 1578 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1579 &puevp->portev_events, pkevp->portkev_pid, 1580 PORT_CALLBACK_DEFAULT, pkevp); 1581 1582 if (error) { 1583 /* 1584 * Event can not be delivered. 1585 * Caller must reinsert the event into the queue. 1586 */ 1587 pkevp->portkev_flags = flags; 1588 return (error); 1589 } 1590 } 1591 if (free_event) 1592 port_free_event_local(pkevp, 0); 1593 return (0); 1594 } 1595 1596 #ifdef _SYSCALL32_IMPL 1597 /* 1598 * 1. copy kernel event structure to user event structure. 1599 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1600 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1601 * 4. Other types of event structures can be delivered back to the port cache 1602 * (port_free_event_local()). 1603 * 5. The event source callback function is the last opportunity for the 1604 * event source to update events, to free local resources associated with 1605 * the event or to deny the delivery of the event. 1606 */ 1607 static int 1608 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list) 1609 { 1610 int free_event = 0; 1611 int error; 1612 int flags; 1613 1614 puevp->portev_source = pkevp->portkev_source; 1615 puevp->portev_object = (daddr32_t)pkevp->portkev_object; 1616 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user; 1617 puevp->portev_events = pkevp->portkev_events; 1618 1619 /* remove event from the queue */ 1620 list_remove(list, pkevp); 1621 1622 /* 1623 * Events if type PORT_KEV_WIRED remain allocated by the 1624 * sub-system (source). 1625 */ 1626 1627 flags = pkevp->portkev_flags; 1628 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1629 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1630 else 1631 free_event = 1; 1632 1633 if (pkevp->portkev_callback != NULL) { 1634 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1635 &puevp->portev_events, pkevp->portkev_pid, 1636 PORT_CALLBACK_DEFAULT, pkevp); 1637 if (error) { 1638 /* 1639 * Event can not be delivered. 1640 * Caller must reinsert the event into the queue. 1641 */ 1642 pkevp->portkev_flags = flags; 1643 return (error); 1644 } 1645 } 1646 if (free_event) 1647 port_free_event_local(pkevp, 0); 1648 return (0); 1649 } 1650 #endif /* _SYSCALL32_IMPL */ 1651 1652 /* 1653 * copyout alert event. 1654 */ 1655 static int 1656 port_get_alert(port_alert_t *pa, port_event_t *uevp) 1657 { 1658 model_t model = get_udatamodel(); 1659 1660 /* copyout alert event structures to user space */ 1661 if (model == DATAMODEL_NATIVE) { 1662 port_event_t uev; 1663 uev.portev_source = PORT_SOURCE_ALERT; 1664 uev.portev_object = pa->portal_object; 1665 uev.portev_events = pa->portal_events; 1666 uev.portev_user = pa->portal_user; 1667 if (copyout(&uev, uevp, sizeof (port_event_t))) 1668 return (EFAULT); 1669 #ifdef _SYSCALL32_IMPL 1670 } else { 1671 port_event32_t uev32; 1672 uev32.portev_source = PORT_SOURCE_ALERT; 1673 uev32.portev_object = (daddr32_t)pa->portal_object; 1674 uev32.portev_events = pa->portal_events; 1675 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user; 1676 if (copyout(&uev32, uevp, sizeof (port_event32_t))) 1677 return (EFAULT); 1678 #endif /* _SYSCALL32_IMPL */ 1679 } 1680 return (0); 1681 } 1682 1683 /* 1684 * Check return conditions : 1685 * - pending port close(2) 1686 * - threads waiting for events 1687 */ 1688 static void 1689 port_check_return_cond(port_queue_t *portq) 1690 { 1691 ASSERT(MUTEX_HELD(&portq->portq_mutex)); 1692 portq->portq_thrcnt--; 1693 if (portq->portq_flags & PORTQ_CLOSE) { 1694 if (portq->portq_thrcnt == 0) 1695 cv_signal(&portq->portq_closecv); 1696 else 1697 cv_signal(&portq->portq_thread->portget_cv); 1698 } 1699 } 1700 1701 /* 1702 * The port_get_kevent() function returns 1703 * - the event located at the head of the queue if 'last' pointer is NULL 1704 * - the next event after the event pointed by 'last' 1705 * The caller of this function is responsible for the integrity of the queue 1706 * in use: 1707 * - port_getn() is using a temporary queue protected with 1708 * portq->portq_block_mutex 1709 * - port_close_events() is working on the global event queue and protects the 1710 * queue with portq->portq_mutex. 1711 */ 1712 1713 port_kevent_t * 1714 port_get_kevent(list_t *list, port_kevent_t *last) 1715 { 1716 if (last == NULL) 1717 return (list_head(list)); 1718 else 1719 return (list_next(list, last)); 1720 } 1721 1722 /* 1723 * The port_get_timeout() function gets the timeout data from user space 1724 * and converts that info into a corresponding internal representation. 1725 * The kerneldata flag means that the timeout data is already loaded. 1726 */ 1727 static int 1728 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp, 1729 int *blocking, int kerneldata) 1730 { 1731 model_t model = get_udatamodel(); 1732 1733 *rqtp = NULL; 1734 if (timeout == NULL) { 1735 *blocking = 1; 1736 return (0); 1737 } 1738 1739 if (kerneldata) { 1740 *rqtime = *timeout; 1741 } else { 1742 if (model == DATAMODEL_NATIVE) { 1743 if (copyin(timeout, rqtime, sizeof (*rqtime))) 1744 return (EFAULT); 1745 #ifdef _SYSCALL32_IMPL 1746 } else { 1747 timespec32_t wait_time_32; 1748 if (copyin(timeout, &wait_time_32, 1749 sizeof (wait_time_32))) 1750 return (EFAULT); 1751 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 1752 #endif /* _SYSCALL32_IMPL */ 1753 } 1754 } 1755 1756 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 1757 *blocking = 0; 1758 return (0); 1759 } 1760 1761 if (rqtime->tv_sec < 0 || 1762 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 1763 return (EINVAL); 1764 1765 *rqtp = rqtime; 1766 *blocking = 1; 1767 return (0); 1768 } 1769 1770 /* 1771 * port_queue_thread() 1772 * Threads requiring more events than available will be put in a wait queue. 1773 * There is a "thread wait queue" per port. 1774 * Threads requiring less events get a higher priority than others and they 1775 * will be awoken first. 1776 */ 1777 static portget_t * 1778 port_queue_thread(port_queue_t *portq, uint_t nget) 1779 { 1780 portget_t *pgetp; 1781 portget_t *ttp; 1782 portget_t *htp; 1783 1784 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP); 1785 pgetp->portget_nget = nget; 1786 pgetp->portget_pid = curproc->p_pid; 1787 if (portq->portq_thread == NULL) { 1788 /* first waiting thread */ 1789 portq->portq_thread = pgetp; 1790 portq->portq_nget = nget; 1791 pgetp->portget_prev = pgetp; 1792 pgetp->portget_next = pgetp; 1793 return (pgetp); 1794 } 1795 1796 /* 1797 * thread waiting for less events will be set on top of the queue. 1798 */ 1799 ttp = portq->portq_thread; 1800 htp = ttp; 1801 for (;;) { 1802 if (nget <= ttp->portget_nget) 1803 break; 1804 if (htp == ttp->portget_next) 1805 break; /* last event */ 1806 ttp = ttp->portget_next; 1807 } 1808 1809 /* add thread to the queue */ 1810 pgetp->portget_next = ttp; 1811 pgetp->portget_prev = ttp->portget_prev; 1812 ttp->portget_prev->portget_next = pgetp; 1813 ttp->portget_prev = pgetp; 1814 if (portq->portq_thread == ttp) 1815 portq->portq_thread = pgetp; 1816 portq->portq_nget = portq->portq_thread->portget_nget; 1817 return (pgetp); 1818 } 1819 1820 /* 1821 * Take thread out of the queue. 1822 */ 1823 static void 1824 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp) 1825 { 1826 if (pgetp->portget_next == pgetp) { 1827 /* last (single) waiting thread */ 1828 portq->portq_thread = NULL; 1829 } else { 1830 pgetp->portget_prev->portget_next = pgetp->portget_next; 1831 pgetp->portget_next->portget_prev = pgetp->portget_prev; 1832 if (portq->portq_thread == pgetp) 1833 portq->portq_thread = pgetp->portget_next; 1834 portq->portq_nget = portq->portq_thread->portget_nget; 1835 } 1836 kmem_free(pgetp, sizeof (portget_t)); 1837 } 1838 1839 /* 1840 * Set up event port kstats. 1841 */ 1842 static void 1843 port_kstat_init() 1844 { 1845 kstat_t *ksp; 1846 uint_t ndata; 1847 1848 ndata = sizeof (port_kstat) / sizeof (kstat_named_t); 1849 ksp = kstat_create("portfs", 0, "Event Ports", "misc", 1850 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL); 1851 if (ksp) { 1852 ksp->ks_data = &port_kstat; 1853 kstat_install(ksp); 1854 } 1855 } 1856