1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/cred.h> 32 #include <sys/modctl.h> 33 #include <sys/vfs.h> 34 #include <sys/vfs_opreg.h> 35 #include <sys/sysmacros.h> 36 #include <sys/cmn_err.h> 37 #include <sys/stat.h> 38 #include <sys/errno.h> 39 #include <sys/kmem.h> 40 #include <sys/file.h> 41 #include <sys/kstat.h> 42 #include <sys/port_impl.h> 43 #include <sys/task.h> 44 #include <sys/project.h> 45 46 /* 47 * Event Ports can be shared across threads or across processes. 48 * Every thread/process can use an own event port or a group of them 49 * can use a single port. A major request was also to get the ability 50 * to submit user-defined events to a port. The idea of the 51 * user-defined events is to use the event ports for communication between 52 * threads/processes (like message queues). User defined-events are queued 53 * in a port with the same priority as other event types. 54 * 55 * Events are delivered only once. The thread/process which is waiting 56 * for events with the "highest priority" (priority here is related to the 57 * internal strategy to wakeup waiting threads) will retrieve the event, 58 * all other threads/processes will not be notified. There is also 59 * the requirement to have events which should be submitted immediately 60 * to all "waiting" threads. That is the main task of the alert event. 61 * The alert event is submitted by the application to a port. The port 62 * changes from a standard mode to the alert mode. Now all waiting threads 63 * will be awaken immediately and they will return with the alert event. 64 * Threads trying to retrieve events from a port in alert mode will 65 * return immediately with the alert event. 66 * 67 * 68 * An event port is like a kernel queue, which accept events submitted from 69 * user level as well as events submitted from kernel sub-systems. Sub-systems 70 * able to submit events to a port are the so-called "event sources". 71 * Current event sources: 72 * PORT_SOURCE_AIO : events submitted per transaction completion from 73 * POSIX-I/O framework. 74 * PORT_SOURCE_TIMER : events submitted when a timer fires 75 * (see timer_create(3RT)). 76 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)). 77 * PORT_SOURCE_ALERT : events submitted from user. This is not really a 78 * single event, this is actually a port mode 79 * (see port_alert(3c)). 80 * PORT_SOURCE_USER : events submitted by applications with 81 * port_send(3c) or port_sendn(3c). 82 * 83 * There is a user API implemented in the libc library as well as a 84 * kernel API implemented in port_subr.c in genunix. 85 * The available user API functions are: 86 * port_create() : create a port as a file descriptor of portfs file system 87 * The standard close(2) function closes a port. 88 * port_associate() : associate a file descriptor with a port to be able to 89 * retrieve events from that file descriptor. 90 * port_dissociate(): remove the association of a file descriptor with a port. 91 * port_alert() : set/unset a port in alert mode 92 * port_send() : send an event of type PORT_SOURCE_USER to a port 93 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports 94 * port_get() : retrieve a single event from a port 95 * port_getn() : retrieve a list of events from a port 96 * 97 * The available kernel API functions are: 98 * port_allocate_event(): allocate an event slot/structure of/from a port 99 * port_init_event() : set event data in the event structure 100 * port_send_event() : send event to a port 101 * port_free_event() : deliver allocated slot/structure back to a port 102 * port_associate_ksource(): associate a kernel event source with a port 103 * port_dissociate_ksource(): dissociate a kernel event source from a port 104 * 105 * The libc implementation consists of small functions which pass the 106 * arguments to the kernel using the "portfs" system call. It means, all the 107 * synchronisation work is being done in the kernel. The "portfs" system 108 * call loads the portfs file system into the kernel. 109 * 110 * PORT CREATION 111 * The first function to be used is port_create() which internally creates 112 * a vnode and a portfs node. The portfs node is represented by the port_t 113 * structure, which again includes all the data necessary to control a port. 114 * port_create() returns a file descriptor, which needs to be used in almost 115 * all other event port functions. 116 * The maximum number of ports per system is controlled by the resource 117 * control: project:port-max-ids. 118 * 119 * EVENT GENERATION 120 * The second step is the triggering of events, which could be sent to a port. 121 * Every event source implements an own method to generate events for a port: 122 * PORT_SOURCE_AIO: 123 * The sigevent structure of the standard POSIX-IO functions 124 * was extended by an additional notification type. 125 * Standard notification types: 126 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD 127 * Event ports introduced now SIGEV_PORT. 128 * The notification type SIGEV_PORT specifies that a structure 129 * of type port_notify_t has to be attached to the sigev_value. 130 * The port_notify_t structure contains the event port file 131 * descriptor and a user-defined pointer. 132 * Internally the AIO implementation will use the kernel API 133 * functions to allocate an event port slot per transaction (aiocb) 134 * and sent the event to the port as soon as the transaction completes. 135 * All the events submitted per transaction are of type 136 * PORT_SOURCE_AIO. 137 * PORT_SOURCE_TIMER: 138 * The timer_create() function uses the same method as the 139 * PORT_SOURCE_AIO event source. It also uses the sigevent structure 140 * to deliver the port information. 141 * Internally the timer code will allocate a single event slot/struct 142 * per timer and it will send the timer event as soon as the timer 143 * fires. If the timer-fired event is not delivered to the application 144 * before the next period elapsed, then an overrun counter will be 145 * incremented. The timer event source uses a callback function to 146 * detect the delivery of the event to the application. At that time 147 * the timer callback function will update the event overrun counter. 148 * PORT_SOURCE_FD: 149 * This event source uses the port_associate() function to allocate 150 * an event slot/struct from a port. The application defines in the 151 * events argument of port_associate() the type of events which it is 152 * interested on. 153 * The internal pollwakeup() function is used by all the file 154 * systems --which are supporting the VOP_POLL() interface- to notify 155 * the upper layer (poll(2), devpoll(7d) and now event ports) about 156 * the event triggered (see valid events in poll(2)). 157 * The pollwakeup() function forwards the event to the layer registered 158 * to receive the current event. 159 * The port_dissociate() function can be used to free the allocated 160 * event slot from the port. Anyway, file descriptors deliver events 161 * only one time and remain deactivated until the application 162 * reactivates the association of a file descriptor with port_associate(). 163 * If an associated file descriptor is closed then the file descriptor 164 * will be dissociated automatically from the port. 165 * 166 * PORT_SOURCE_ALERT: 167 * This event type is generated when the port was previously set in 168 * alert mode using the port_alert() function. 169 * A single alert event is delivered to every thread which tries to 170 * retrieve events from a port. 171 * PORT_SOURCE_USER: 172 * This type of event is generated from user level using the port_send() 173 * function to send a user event to a port or the port_sendn() function 174 * to send an event to a list of ports. 175 * 176 * EVENT DELIVERY / RETRIEVING EVENTS 177 * Events remain in the port queue until: 178 * - the application uses port_get() or port_getn() to retrieve events, 179 * - the event source cancel the event, 180 * - the event port is closed or 181 * - the process exits. 182 * The maximal number of events in a port queue is the maximal number 183 * of event slots/structures which can be allocated by event sources. 184 * The allocation of event slots/structures is controlled by the resource 185 * control: process.port-max-events. 186 * The port_get() function retrieves a single event and the port_getn() 187 * function retrieves a list of events. 188 * Events are classified as shareable and non-shareable events across processes. 189 * Non-shareable events are invisible for the port_get(n)() functions of 190 * processes other than the owner of the event. 191 * Shareable event types are: 192 * PORT_SOURCE_USER events 193 * This type of event is unconditionally shareable and without 194 * limitations. If the parent process sends a user event and closes 195 * the port afterwards, the event remains in the port and the child 196 * process will still be able to retrieve the user event. 197 * PORT_SOURCE_ALERT events 198 * This type of event is shareable between processes. 199 * Limitation: The alert mode of the port is removed if the owner 200 * (process which set the port in alert mode) of the 201 * alert event closes the port. 202 * PORT_SOURCE_FD events 203 * This type of event is conditional shareable between processes. 204 * After fork(2) all forked file descriptors are shareable between 205 * the processes. The child process is allowed to retrieve events 206 * from the associated file descriptors and it can also re-associate 207 * the fd with the port. 208 * Limitations: The child process is not allowed to dissociate 209 * the file descriptor from the port. Only the 210 * owner (process) of the association is allowed to 211 * dissociate the file descriptor from the port. 212 * If the owner of the association closes the port 213 * the association will be removed. 214 * PORT_SOURCE_AIO events 215 * This type of event is not shareable between processes. 216 * PORT_SOURCE_TIMER events 217 * This type of event is not shareable between processes. 218 * 219 * FORK BEHAVIOUR 220 * On fork(2) the child process inherits all opened file descriptors from 221 * the parent process. This is also valid for port file descriptors. 222 * Associated file descriptors with a port maintain the association across the 223 * fork(2). It means, the child process gets full access to the port and 224 * it can retrieve events from all common associated file descriptors. 225 * Events of file descriptors created and associated with a port after the 226 * fork(2) are non-shareable and can only be retrieved by the same process. 227 * 228 * If the parent or the child process closes an exported port (using fork(2) 229 * or I_SENDFD) all the file descriptors associated with the port by the 230 * process will be dissociated from the port. Events of dissociated file 231 * descriptors as well as all non-shareable events will be discarded. 232 * The other process can continue working with the port as usual. 233 * 234 * CLOSING A PORT 235 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details. 236 * 237 * PORT EVENT STRUCTURES 238 * The global control structure of the event ports framework is port_control_t. 239 * port_control_t keeps track of the number of created ports in the system. 240 * The cache of the port event structures is also located in port_control_t. 241 * 242 * On port_create() the vnode and the portfs node is also created. 243 * The portfs node is represented by the port_t structure. 244 * The port_t structure manages all port specific tasks: 245 * - management of resource control values 246 * - port VOP_POLL interface 247 * - creation time 248 * - uid and gid of the port 249 * 250 * The port_t structure contains the port_queue_t structure. 251 * The port_queue_t structure contains all the data necessary for the 252 * queue management: 253 * - locking 254 * - condition variables 255 * - event counters 256 * - submitted events (represented by port_kevent_t structures) 257 * - threads waiting for event delivery (check portget_t structure) 258 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure) 259 * - event source management (managed by the port_source_t structure) 260 * - alert mode management (check port_alert_t structure) 261 * 262 * EVENT MANAGEMENT 263 * The event port file system creates a kmem_cache for internal allocation of 264 * event port structures. 265 * 266 * 1. Event source association with a port: 267 * The first step to do for event sources is to get associated with a port 268 * using the port_associate_ksource() function or adding an entry to the 269 * port_ksource_tab[]. An event source can get dissociated from a port 270 * using the port_dissociate_ksource() function. An entry in the 271 * port_ksource_tab[] implies that the source will be associated 272 * automatically with every new created port. 273 * The event source can deliver a callback function, which is used by the 274 * port to notify the event source about close(2). The idea is that 275 * in such a case the event source should free all allocated resources 276 * and it must return to the port all allocated slots/structures. 277 * The port_close() function will wait until all allocated event 278 * structures/slots are returned to the port. 279 * The callback function is not necessary when the event source does not 280 * maintain local resources, a second condition is that the event source 281 * can guarantee that allocated event slots will be returned without 282 * delay to the port (it will not block and sleep somewhere). 283 * 284 * 2. Reservation of an event slot / event structure 285 * The event port reliability is based on the reservation of an event "slot" 286 * (allocation of an event structure) by the event source as part of the 287 * application call. If the maximal number of event slots is exhausted then 288 * the event source can return a corresponding error code to the application. 289 * 290 * The port_alloc_event() function has to be used by event sources to 291 * allocate an event slot (reserve an event structure). The port_alloc_event() 292 * doesn not block and it will return a 0 value on success or an error code 293 * if it fails. 294 * An argument of port_alloc_event() is a flag which determines the behavior 295 * of the event after it was delivered to the application: 296 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the 297 * application. 298 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event 299 * source. This kind of slots can not be used for 300 * event delivery and should only be used internally 301 * by the event source. 302 * PORT_KEV_CACHED : event slot remains under the control of an event 303 * port cache. It does not become free after delivery 304 * to the application. 305 * PORT_ALLOC_SCACHED : event slot remains under the control of the event 306 * source. The event source takes the control over 307 * the slot after the event is delivered to the 308 * application. 309 * 310 * 3. Delivery of events to the event port 311 * Earlier allocated event structure/slot has to be used to deliver 312 * event data to the port. Event source has to use the function 313 * port_send_event(). The single argument is a pointer to the previously 314 * reserved event structure/slot. 315 * The portkev_events field of the port_kevent_t structure can be updated/set 316 * in two ways: 317 * 1. using the port_set_event() function, or 318 * 2. updating the portkev_events field out of the callback function: 319 * The event source can deliver a callback function to the port as an 320 * argument of port_init_event(). 321 * One of the arguments of the callback function is a pointer to the 322 * events field, which will be delivered to the application. 323 * (see Delivery of events to the application). 324 * Event structures/slots can be delivered to the event port only one time, 325 * they remain blocked until the data is delivered to the application and the 326 * slot becomes free or it is delivered back to the event source 327 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above 328 * is at the same time the indicator for the event source that the event 329 * structure/slot is free for reuse. 330 * 331 * 4. Delivery of events to the application 332 * The events structures/slots delivered by event sources remain in the 333 * port queue until they are retrieved by the application or the port 334 * is closed (exit(2) also closes all opened file descriptors).. 335 * The application uses port_get() or port_getn() to retrieve events from 336 * a port. port_get() retrieves a single event structure/slot and port_getn() 337 * retrieves a list of event structures/slots. 338 * Both functions are able to poll for events and return immediately or they 339 * can specify a timeout value. 340 * Before the events are delivered to the application they are moved to a 341 * second temporary internal queue. The idea is to avoid lock collisions or 342 * contentions of the global queue lock. 343 * The global queue lock is used every time when an event source delivers 344 * new events to the port. 345 * The port_get() and port_getn() functions 346 * a) retrieve single events from the temporary queue, 347 * b) prepare the data to be passed to the application memory, 348 * c) activate the callback function of the event sources: 349 * - to get the latest event data, 350 * - the event source can free all allocated resources associated with the 351 * current event, 352 * - the event source can re-use the current event slot/structure 353 * - the event source can deny the delivery of the event to the application 354 * (e.g. because of the wrong process). 355 * d) put the event back to the temporary queue if the event delivery was denied 356 * e) repeat a) until d) as long as there are events in the queue and 357 * there is enough user space available. 358 * 359 * The loop described above could block for a very long time the global mutex, 360 * to avoid that a second mutex was introduced to synchronized concurrent 361 * threads accessing the temporary queue. 362 */ 363 364 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, 365 uintptr_t); 366 367 static struct sysent port_sysent = { 368 6, 369 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 370 (int (*)())portfs, 371 }; 372 373 static struct modlsys modlsys = { 374 &mod_syscallops, "event ports", &port_sysent 375 }; 376 377 #ifdef _SYSCALL32_IMPL 378 379 static int64_t 380 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4, 381 uint32_t arg5, uint32_t arg6); 382 383 static struct sysent port_sysent32 = { 384 6, 385 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 386 (int (*)())portfs32, 387 }; 388 389 static struct modlsys modlsys32 = { 390 &mod_syscallops32, 391 "32-bit event ports syscalls", 392 &port_sysent32 393 }; 394 #endif /* _SYSCALL32_IMPL */ 395 396 static struct modlinkage modlinkage = { 397 MODREV_1, 398 &modlsys, 399 #ifdef _SYSCALL32_IMPL 400 &modlsys32, 401 #endif 402 NULL 403 }; 404 405 port_kstat_t port_kstat = { 406 { "ports", KSTAT_DATA_UINT32 } 407 }; 408 409 dev_t portdev; 410 struct vnodeops *port_vnodeops; 411 struct vfs port_vfs; 412 413 extern rctl_hndl_t rc_process_portev; 414 extern rctl_hndl_t rc_project_portids; 415 extern void aio_close_port(void *, int, pid_t, int); 416 417 /* 418 * This table contains a list of event sources which need a static 419 * association with a port (every port). 420 * The last NULL entry in the table is required to detect "end of table". 421 */ 422 struct port_ksource port_ksource_tab[] = { 423 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL}, 424 {0, NULL, NULL, NULL} 425 }; 426 427 /* local functions */ 428 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *, 429 port_gettimer_t *); 430 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *); 431 static int port_alert(port_t *, int, int, void *); 432 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *); 433 static int port_send(port_t *, int, int, void *); 434 static int port_create(int *); 435 static int port_get_alert(port_alert_t *, port_event_t *); 436 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *); 437 static int *port_errorn(int *, int, int, int); 438 static int port_noshare(void *, int *, pid_t, int, void *); 439 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *, 440 int); 441 static void port_init(port_t *); 442 static void port_remove_alert(port_queue_t *); 443 static void port_add_ksource_local(port_t *, port_ksource_t *); 444 static void port_check_return_cond(port_queue_t *); 445 static void port_dequeue_thread(port_queue_t *, portget_t *); 446 static portget_t *port_queue_thread(port_queue_t *, uint_t); 447 static void port_kstat_init(void); 448 449 #ifdef _SYSCALL32_IMPL 450 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *); 451 #endif 452 453 int 454 _init(void) 455 { 456 static const fs_operation_def_t port_vfsops_template[] = { 457 NULL, NULL 458 }; 459 extern const fs_operation_def_t port_vnodeops_template[]; 460 vfsops_t *port_vfsops; 461 int error; 462 major_t major; 463 464 if ((major = getudev()) == (major_t)-1) 465 return (ENXIO); 466 portdev = makedevice(major, 0); 467 468 /* Create a dummy vfs */ 469 error = vfs_makefsops(port_vfsops_template, &port_vfsops); 470 if (error) { 471 cmn_err(CE_WARN, "port init: bad vfs ops"); 472 return (error); 473 } 474 vfs_setops(&port_vfs, port_vfsops); 475 port_vfs.vfs_flag = VFS_RDONLY; 476 port_vfs.vfs_dev = portdev; 477 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0); 478 479 error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops); 480 if (error) { 481 vfs_freevfsops(port_vfsops); 482 cmn_err(CE_WARN, "port init: bad vnode ops"); 483 return (error); 484 } 485 486 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL); 487 port_control.pc_nents = 0; /* number of active ports */ 488 489 /* create kmem_cache for port event structures */ 490 port_control.pc_cache = kmem_cache_create("port_cache", 491 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 492 493 port_kstat_init(); /* init port kstats */ 494 return (mod_install(&modlinkage)); 495 } 496 497 int 498 _info(struct modinfo *modinfop) 499 { 500 return (mod_info(&modlinkage, modinfop)); 501 } 502 503 /* 504 * System call wrapper for all port related system calls from 32-bit programs. 505 */ 506 #ifdef _SYSCALL32_IMPL 507 static int64_t 508 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, 509 uint32_t a4) 510 { 511 int64_t error; 512 513 switch (opcode & PORT_CODE_MASK) { 514 case PORT_GET: 515 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4); 516 break; 517 case PORT_SENDN: 518 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4); 519 break; 520 default: 521 error = portfs(opcode, a0, a1, a2, a3, a4); 522 break; 523 } 524 return (error); 525 } 526 #endif /* _SYSCALL32_IMPL */ 527 528 /* 529 * System entry point for port functions. 530 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE). 531 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a 532 * port file descriptor as first argument. 533 */ 534 static int64_t 535 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, 536 uintptr_t a4) 537 { 538 rval_t r; 539 port_t *pp; 540 int error = 0; 541 uint_t nget; 542 file_t *fp; 543 port_gettimer_t port_timer; 544 545 r.r_vals = 0; 546 if (opcode & PORT_SYS_NOPORT) { 547 opcode &= PORT_CODE_MASK; 548 if (opcode == PORT_SENDN) { 549 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2, 550 (int)a3, (void *)a4, (uint_t *)&r.r_val1); 551 if (error && (error != EIO)) 552 return ((int64_t)set_errno(error)); 553 return (r.r_vals); 554 } 555 556 if (opcode == PORT_CREATE) { 557 error = port_create(&r.r_val1); 558 if (error) 559 return ((int64_t)set_errno(error)); 560 return (r.r_vals); 561 } 562 } 563 564 /* opcodes using port as first argument (a0) */ 565 566 if ((fp = getf((int)a0)) == NULL) 567 return ((uintptr_t)set_errno(EBADF)); 568 569 if (fp->f_vnode->v_type != VPORT) { 570 releasef((int)a0); 571 return ((uintptr_t)set_errno(EBADFD)); 572 } 573 574 pp = VTOEP(fp->f_vnode); 575 576 switch (opcode & PORT_CODE_MASK) { 577 case PORT_GET: 578 { 579 /* see PORT_GETN description */ 580 struct timespec timeout; 581 582 port_timer.pgt_flags = PORTGET_ONE; 583 port_timer.pgt_loop = 0; 584 port_timer.pgt_rqtp = NULL; 585 if (a4 != NULL) { 586 port_timer.pgt_timeout = &timeout; 587 timeout.tv_sec = (time_t)a2; 588 timeout.tv_nsec = (long)a3; 589 } else { 590 port_timer.pgt_timeout = NULL; 591 } 592 do { 593 nget = 1; 594 error = port_getn(pp, (port_event_t *)a1, 1, 595 (uint_t *)&nget, &port_timer); 596 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 597 break; 598 } 599 case PORT_GETN: 600 { 601 /* 602 * port_getn() can only retrieve own or shareable events from 603 * other processes. The port_getn() function remains in the 604 * kernel until own or shareable events are available or the 605 * timeout elapses. 606 */ 607 port_timer.pgt_flags = 0; 608 port_timer.pgt_loop = 0; 609 port_timer.pgt_rqtp = NULL; 610 port_timer.pgt_timeout = (struct timespec *)a4; 611 do { 612 nget = a3; 613 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2, 614 (uint_t *)&nget, &port_timer); 615 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 616 r.r_val1 = nget; 617 r.r_val2 = error; 618 releasef((int)a0); 619 if (error && error != ETIME) 620 return ((int64_t)set_errno(error)); 621 return (r.r_vals); 622 } 623 case PORT_ASSOCIATE: 624 { 625 /* currently only PORT_SOURCE_FD is implemented */ 626 if ((int)a1 != PORT_SOURCE_FD) { 627 error = EINVAL; 628 break; 629 } 630 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, (int)a3, 631 (void *)a4); 632 break; 633 } 634 case PORT_SEND: 635 { 636 /* user-defined events */ 637 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2); 638 break; 639 } 640 case PORT_DISPATCH: 641 { 642 /* 643 * library events, blocking 644 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ 645 * are currently allowed. 646 */ 647 if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) { 648 error = EINVAL; 649 break; 650 } 651 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2, 652 (uintptr_t)a3, (void *)a4); 653 break; 654 } 655 case PORT_DISSOCIATE: 656 { 657 /* currently only PORT_SOURCE_FD is implemented */ 658 if ((int)a1 != PORT_SOURCE_FD) { 659 error = EINVAL; 660 break; 661 } 662 error = port_dissociate_fd(pp, (uintptr_t)a2); 663 break; 664 } 665 case PORT_ALERT: 666 { 667 if ((int)a2) /* a2 = events */ 668 error = port_alert(pp, (int)a1, (int)a2, (void *)a3); 669 else 670 port_remove_alert(&pp->port_queue); 671 break; 672 } 673 default: 674 error = EINVAL; 675 break; 676 } 677 678 releasef((int)a0); 679 if (error) 680 return ((int64_t)set_errno(error)); 681 return (r.r_vals); 682 } 683 684 /* 685 * System call to create a port. 686 * 687 * The port_create() function creates a vnode of type VPORT per port. 688 * The port control data is associated with the vnode as vnode private data. 689 * The port_create() function returns an event port file descriptor. 690 */ 691 static int 692 port_create(int *fdp) 693 { 694 port_t *pp; 695 vnode_t *vp; 696 struct file *fp; 697 proc_t *p = curproc; 698 699 /* initialize vnode and port private data */ 700 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP); 701 702 pp->port_vnode = vn_alloc(KM_SLEEP); 703 vp = EPTOV(pp); 704 vn_setops(vp, port_vnodeops); 705 vp->v_type = VPORT; 706 vp->v_vfsp = &port_vfs; 707 vp->v_data = (caddr_t)pp; 708 709 mutex_enter(&port_control.pc_mutex); 710 /* 711 * Retrieve the maximal number of event ports allowed per system from 712 * the resource control: project.port-max-ids. 713 */ 714 mutex_enter(&p->p_lock); 715 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p, 716 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) { 717 mutex_exit(&p->p_lock); 718 vn_free(vp); 719 kmem_free(pp, sizeof (port_t)); 720 mutex_exit(&port_control.pc_mutex); 721 return (EAGAIN); 722 } 723 724 /* 725 * Retrieve the maximal number of events allowed per port from 726 * the resource control: process.port-max-events. 727 */ 728 pp->port_max_events = rctl_enforced_value(rc_process_portev, 729 p->p_rctls, p); 730 mutex_exit(&p->p_lock); 731 732 /* allocate a new user file descriptor and a file structure */ 733 if (falloc(vp, 0, &fp, fdp)) { 734 /* 735 * If the file table is full, free allocated resources. 736 */ 737 vn_free(vp); 738 kmem_free(pp, sizeof (port_t)); 739 mutex_exit(&port_control.pc_mutex); 740 return (EMFILE); 741 } 742 743 mutex_exit(&fp->f_tlock); 744 745 pp->port_fd = *fdp; 746 port_control.pc_nents++; 747 p->p_portcnt++; 748 port_kstat.pks_ports.value.ui32++; 749 mutex_exit(&port_control.pc_mutex); 750 751 /* initializes port private data */ 752 port_init(pp); 753 /* set user file pointer */ 754 setf(*fdp, fp); 755 return (0); 756 } 757 758 /* 759 * port_init() initializes event port specific data 760 */ 761 static void 762 port_init(port_t *pp) 763 { 764 port_queue_t *portq; 765 port_ksource_t *pks; 766 767 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL); 768 portq = &pp->port_queue; 769 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL); 770 pp->port_flags |= PORT_INIT; 771 772 /* 773 * If it is not enough memory available to satisfy a user 774 * request using a single port_getn() call then port_getn() 775 * will reduce the size of the list to PORT_MAX_LIST. 776 */ 777 pp->port_max_list = port_max_list; 778 779 /* Set timestamp entries required for fstat(2) requests */ 780 gethrestime(&pp->port_ctime); 781 pp->port_uid = crgetuid(curproc->p_cred); 782 pp->port_gid = crgetgid(curproc->p_cred); 783 784 /* initialize port queue structs */ 785 list_create(&portq->portq_list, sizeof (port_kevent_t), 786 offsetof(port_kevent_t, portkev_node)); 787 list_create(&portq->portq_get_list, sizeof (port_kevent_t), 788 offsetof(port_kevent_t, portkev_node)); 789 portq->portq_flags = 0; 790 pp->port_pid = curproc->p_pid; 791 792 /* Allocate cache skeleton for PORT_SOURCE_FD events */ 793 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP); 794 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL); 795 796 /* 797 * Allocate cache skeleton for association of event sources. 798 */ 799 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL); 800 portq->portq_scache = kmem_zalloc( 801 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP); 802 803 /* 804 * pre-associate some kernel sources with this port. 805 * The pre-association is required to create port_source_t 806 * structures for object association. 807 * Some sources can not get associated with a port before the first 808 * object association is requested. Another reason to pre_associate 809 * a particular source with a port is because of performance. 810 */ 811 812 for (pks = port_ksource_tab; pks->pks_source != 0; pks++) 813 port_add_ksource_local(pp, pks); 814 } 815 816 /* 817 * The port_add_ksource_local() function is being used to associate 818 * event sources with every new port. 819 * The event sources need to be added to port_ksource_tab[]. 820 */ 821 static void 822 port_add_ksource_local(port_t *pp, port_ksource_t *pks) 823 { 824 port_source_t *pse; 825 port_source_t **ps; 826 827 mutex_enter(&pp->port_queue.portq_source_mutex); 828 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)]; 829 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { 830 if (pse->portsrc_source == pks->pks_source) 831 break; 832 } 833 834 if (pse == NULL) { 835 /* associate new source with the port */ 836 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP); 837 pse->portsrc_source = pks->pks_source; 838 pse->portsrc_close = pks->pks_close; 839 pse->portsrc_closearg = pks->pks_closearg; 840 pse->portsrc_cnt = 1; 841 842 pks->pks_portsrc = pse; 843 if (*ps != NULL) 844 pse->portsrc_next = (*ps)->portsrc_next; 845 *ps = pse; 846 } 847 mutex_exit(&pp->port_queue.portq_source_mutex); 848 } 849 850 /* 851 * The port_send() function sends an event of type "source" to a 852 * port. This function is non-blocking. An event can be sent to 853 * a port as long as the number of events per port does not achieve the 854 * maximal allowed number of events. The max. number of events per port is 855 * defined by the resource control process.max-port-events. 856 * This function is used by the port library function port_send() 857 * and port_dispatch(). The port_send(3c) function is part of the 858 * event ports API and submits events of type PORT_SOURCE_USER. The 859 * port_dispatch() function is project private and it is used by library 860 * functions to submit events of other types than PORT_SOURCE_USER 861 * (e.g. PORT_SOURCE_AIO). 862 */ 863 static int 864 port_send(port_t *pp, int source, int events, void *user) 865 { 866 port_kevent_t *pev; 867 int error; 868 869 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev); 870 if (error) 871 return (error); 872 873 pev->portkev_object = 0; 874 pev->portkev_events = events; 875 pev->portkev_user = user; 876 pev->portkev_callback = NULL; 877 pev->portkev_arg = NULL; 878 pev->portkev_flags = 0; 879 880 port_send_event(pev); 881 return (0); 882 } 883 884 /* 885 * The port_noshare() function returns 0 if the current event was generated 886 * by the same process. Otherwise is returns a value other than 0 and the 887 * event should not be delivered to the current processe. 888 * The port_noshare() function is normally used by the port_dispatch() 889 * function. The port_dispatch() function is project private and can only be 890 * used within the event port project. 891 * Currently the libaio uses the port_dispatch() function to deliver events 892 * of types PORT_SOURCE_AIO. 893 */ 894 /* ARGSUSED */ 895 static int 896 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp) 897 { 898 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid) 899 return (1); 900 return (0); 901 } 902 903 /* 904 * The port_dispatch_event() function is project private and it is used by 905 * libraries involved in the project to deliver events to the port. 906 * port_dispatch will sleep and wait for enough resources to satisfy the 907 * request, if necessary. 908 * The library can specify if the delivered event is shareable with other 909 * processes (see PORT_SYS_NOSHARE flag). 910 */ 911 static int 912 port_dispatch_event(port_t *pp, int opcode, int source, int events, 913 uintptr_t object, void *user) 914 { 915 port_kevent_t *pev; 916 int error; 917 918 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev); 919 if (error) 920 return (error); 921 922 pev->portkev_object = object; 923 pev->portkev_events = events; 924 pev->portkev_user = user; 925 pev->portkev_arg = NULL; 926 if (opcode & PORT_SYS_NOSHARE) { 927 pev->portkev_flags = PORT_KEV_NOSHARE; 928 pev->portkev_callback = port_noshare; 929 } else { 930 pev->portkev_flags = 0; 931 pev->portkev_callback = NULL; 932 } 933 934 port_send_event(pev); 935 return (0); 936 } 937 938 939 /* 940 * The port_sendn() function is the kernel implementation of the event 941 * port API function port_sendn(3c). 942 * This function is able to send an event to a list of event ports. 943 */ 944 static int 945 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user, 946 uint_t *nget) 947 { 948 port_kevent_t *pev; 949 int errorcnt = 0; 950 int error = 0; 951 int count; 952 int port; 953 int *plist; 954 int *elist = NULL; 955 file_t *fp; 956 port_t *pp; 957 958 if (nent == 0 || nent > port_max_list) 959 return (EINVAL); 960 961 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP); 962 if (copyin((void *)ports, plist, nent * sizeof (int))) { 963 kmem_free(plist, nent * sizeof (int)); 964 return (EFAULT); 965 } 966 967 /* 968 * Scan the list for event port file descriptors and send the 969 * attached user event data embedded in a event of type 970 * PORT_SOURCE_USER to every event port in the list. 971 * If a list entry is not a valid event port then the corresponding 972 * error code will be stored in the errors[] list with the same 973 * list offset as in the ports[] list. 974 */ 975 976 for (count = 0; count < nent; count++) { 977 port = plist[count]; 978 if ((fp = getf(port)) == NULL) { 979 elist = port_errorn(elist, nent, EBADF, count); 980 errorcnt++; 981 continue; 982 } 983 984 pp = VTOEP(fp->f_vnode); 985 if (fp->f_vnode->v_type != VPORT) { 986 releasef(port); 987 elist = port_errorn(elist, nent, EBADFD, count); 988 errorcnt++; 989 continue; 990 } 991 992 error = port_alloc_event_local(pp, PORT_SOURCE_USER, 993 PORT_ALLOC_DEFAULT, &pev); 994 if (error) { 995 releasef(port); 996 elist = port_errorn(elist, nent, error, count); 997 errorcnt++; 998 continue; 999 } 1000 1001 pev->portkev_object = 0; 1002 pev->portkev_events = events; 1003 pev->portkev_user = user; 1004 pev->portkev_callback = NULL; 1005 pev->portkev_arg = NULL; 1006 pev->portkev_flags = 0; 1007 1008 port_send_event(pev); 1009 releasef(port); 1010 } 1011 if (errorcnt) { 1012 error = EIO; 1013 if (copyout(elist, (void *)errors, nent * sizeof (int))) 1014 error = EFAULT; 1015 kmem_free(elist, nent * sizeof (int)); 1016 } 1017 *nget = nent - errorcnt; 1018 kmem_free(plist, nent * sizeof (int)); 1019 return (error); 1020 } 1021 1022 static int * 1023 port_errorn(int *elist, int nent, int error, int index) 1024 { 1025 if (elist == NULL) 1026 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP); 1027 elist[index] = error; 1028 return (elist); 1029 } 1030 1031 /* 1032 * port_alert() 1033 * The port_alert() funcion is a high priority event and it is always set 1034 * on top of the queue. It is also delivered as single event. 1035 * flags: 1036 * - SET :overwrite current alert data 1037 * - UPDATE:set alert data or return EBUSY if alert mode is already set 1038 * 1039 * - set the ALERT flag 1040 * - wakeup all sleeping threads 1041 */ 1042 static int 1043 port_alert(port_t *pp, int flags, int events, void *user) 1044 { 1045 port_queue_t *portq; 1046 portget_t *pgetp; 1047 port_alert_t *pa; 1048 1049 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID) 1050 return (EINVAL); 1051 1052 portq = &pp->port_queue; 1053 pa = &portq->portq_alert; 1054 mutex_enter(&portq->portq_mutex); 1055 1056 /* check alert conditions */ 1057 if (flags == PORT_ALERT_UPDATE) { 1058 if (portq->portq_flags & PORTQ_ALERT) { 1059 mutex_exit(&portq->portq_mutex); 1060 return (EBUSY); 1061 } 1062 } 1063 1064 /* 1065 * Store alert data in the port to be delivered to threads 1066 * which are using port_get(n) to retrieve events. 1067 */ 1068 1069 portq->portq_flags |= PORTQ_ALERT; 1070 pa->portal_events = events; /* alert info */ 1071 pa->portal_pid = curproc->p_pid; /* process owner */ 1072 pa->portal_object = 0; /* no object */ 1073 pa->portal_user = user; /* user alert data */ 1074 1075 /* alert and deliver alert data to waiting threads */ 1076 pgetp = portq->portq_thread; 1077 if (pgetp == NULL) { 1078 /* no threads waiting for events */ 1079 mutex_exit(&portq->portq_mutex); 1080 return (0); 1081 } 1082 1083 /* 1084 * Set waiting threads in alert mode (PORTGET_ALERT).. 1085 * Every thread waiting for events already allocated a portget_t 1086 * structure to sleep on. 1087 * The port alert arguments are stored in the portget_t structure. 1088 * The PORTGET_ALERT flag is set to indicate the thread to return 1089 * immediately with the alert event. 1090 */ 1091 do { 1092 if ((pgetp->portget_state & PORTGET_ALERT) == 0) { 1093 pa = &pgetp->portget_alert; 1094 pa->portal_events = events; 1095 pa->portal_object = 0; 1096 pa->portal_user = user; 1097 pgetp->portget_state |= PORTGET_ALERT; 1098 cv_signal(&pgetp->portget_cv); 1099 } 1100 } while ((pgetp = pgetp->portget_next) != portq->portq_thread); 1101 mutex_exit(&portq->portq_mutex); 1102 return (0); 1103 } 1104 1105 /* 1106 * Clear alert state of the port 1107 */ 1108 static void 1109 port_remove_alert(port_queue_t *portq) 1110 { 1111 mutex_enter(&portq->portq_mutex); 1112 portq->portq_flags &= ~PORTQ_ALERT; 1113 mutex_exit(&portq->portq_mutex); 1114 } 1115 1116 /* 1117 * The port_getn() function is used to retrieve events from a port. 1118 * 1119 * The port_getn() function returns immediately if there are enough events 1120 * available in the port to satisfy the request or if the port is in alert 1121 * mode (see port_alert(3c)). 1122 * The timeout argument of port_getn(3c) -which is embedded in the 1123 * port_gettimer_t structure- specifies if the system call should block or if it 1124 * should return immediately depending on the number of events available. 1125 * This function is internally used by port_getn(3c) as well as by 1126 * port_get(3c). 1127 */ 1128 static int 1129 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget, 1130 port_gettimer_t *pgt) 1131 { 1132 port_queue_t *portq; 1133 port_kevent_t *pev; 1134 port_kevent_t *lev; 1135 int error = 0; 1136 uint_t nmax; 1137 uint_t nevents; 1138 uint_t eventsz; 1139 port_event_t *kevp; 1140 list_t *glist; 1141 uint_t tnent; 1142 int rval; 1143 int blocking = -1; 1144 int timecheck; 1145 int flag; 1146 timespec_t rqtime; 1147 timespec_t *rqtp = NULL; 1148 portget_t *pgetp; 1149 void *results; 1150 model_t model = get_udatamodel(); 1151 1152 flag = pgt->pgt_flags; 1153 1154 if (*nget > max && max > 0) 1155 return (EINVAL); 1156 1157 portq = &pp->port_queue; 1158 mutex_enter(&portq->portq_mutex); 1159 if (max == 0) { 1160 /* 1161 * Return number of objects with events. 1162 * The port_block() call is required to synchronize this 1163 * thread with another possible thread, which could be 1164 * retrieving events from the port queue. 1165 */ 1166 port_block(portq); 1167 /* 1168 * Check if a second thread is currently retrieving events 1169 * and it is using the temporary event queue. 1170 */ 1171 if (portq->portq_tnent) { 1172 /* put remaining events back to the port queue */ 1173 port_push_eventq(portq); 1174 } 1175 *nget = portq->portq_nent; 1176 port_unblock(portq); 1177 mutex_exit(&portq->portq_mutex); 1178 return (0); 1179 } 1180 1181 if (uevp == NULL) { 1182 mutex_exit(&portq->portq_mutex); 1183 return (EFAULT); 1184 } 1185 if (*nget == 0) { /* no events required */ 1186 mutex_exit(&portq->portq_mutex); 1187 return (0); 1188 } 1189 1190 /* port is being closed ... */ 1191 if (portq->portq_flags & PORTQ_CLOSE) { 1192 mutex_exit(&portq->portq_mutex); 1193 return (EBADFD); 1194 } 1195 1196 /* return immediately if port in alert mode */ 1197 if (portq->portq_flags & PORTQ_ALERT) { 1198 error = port_get_alert(&portq->portq_alert, uevp); 1199 if (error == 0) 1200 *nget = 1; 1201 mutex_exit(&portq->portq_mutex); 1202 return (error); 1203 } 1204 1205 portq->portq_thrcnt++; 1206 1207 /* 1208 * Now check if the completed events satisfy the 1209 * "wait" requirements of the current thread: 1210 */ 1211 1212 if (pgt->pgt_loop) { 1213 /* 1214 * loop entry of same thread 1215 * pgt_loop is set when the current thread returns 1216 * prematurely from this function. That could happen 1217 * when a port is being shared between processes and 1218 * this thread could not find events to return. 1219 * It is not allowed to a thread to retrieve non-shareable 1220 * events generated in other processes. 1221 * PORTQ_WAIT_EVENTS is set when a thread already 1222 * checked the current event queue and no new events 1223 * are added to the queue. 1224 */ 1225 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) && 1226 (portq->portq_nent >= *nget)) { 1227 /* some new events arrived ...check them */ 1228 goto portnowait; 1229 } 1230 rqtp = pgt->pgt_rqtp; 1231 timecheck = pgt->pgt_timecheck; 1232 pgt->pgt_flags |= PORTGET_WAIT_EVENTS; 1233 } else { 1234 /* check if enough events are available ... */ 1235 if (portq->portq_nent >= *nget) 1236 goto portnowait; 1237 /* 1238 * There are not enough events available to satisfy 1239 * the request, check timeout value and wait for 1240 * incoming events. 1241 */ 1242 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp, 1243 &blocking, flag); 1244 if (error) { 1245 port_check_return_cond(portq); 1246 mutex_exit(&portq->portq_mutex); 1247 return (error); 1248 } 1249 1250 if (blocking == 0) /* don't block, check fired events */ 1251 goto portnowait; 1252 1253 if (rqtp != NULL) { 1254 timespec_t now; 1255 timecheck = timechanged; 1256 gethrestime(&now); 1257 timespecadd(rqtp, &now); 1258 } 1259 } 1260 1261 /* enqueue thread in the list of waiting threads */ 1262 pgetp = port_queue_thread(portq, *nget); 1263 1264 1265 /* Wait here until return conditions met */ 1266 for (;;) { 1267 if (pgetp->portget_state & PORTGET_ALERT) { 1268 /* reap alert event and return */ 1269 error = port_get_alert(&pgetp->portget_alert, uevp); 1270 if (error) 1271 *nget = 0; 1272 else 1273 *nget = 1; 1274 port_dequeue_thread(&pp->port_queue, pgetp); 1275 portq->portq_thrcnt--; 1276 mutex_exit(&portq->portq_mutex); 1277 return (error); 1278 } 1279 1280 /* 1281 * Check if some other thread is already retrieving 1282 * events (portq_getn > 0). 1283 */ 1284 1285 if ((portq->portq_getn == 0) && 1286 ((portq)->portq_nent >= *nget) && 1287 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) || 1288 !((portq)->portq_flags & PORTQ_WAIT_EVENTS))) 1289 break; 1290 1291 if (portq->portq_flags & PORTQ_CLOSE) { 1292 error = EBADFD; 1293 break; 1294 } 1295 1296 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex, 1297 rqtp, timecheck); 1298 1299 if (rval <= 0) { 1300 error = (rval == 0) ? EINTR : ETIME; 1301 break; 1302 } 1303 } 1304 1305 /* take thread out of the wait queue */ 1306 port_dequeue_thread(portq, pgetp); 1307 1308 if (error != 0 && (error == EINTR || error == EBADFD || 1309 (error == ETIME && flag))) { 1310 /* return without events */ 1311 port_check_return_cond(portq); 1312 mutex_exit(&portq->portq_mutex); 1313 return (error); 1314 } 1315 1316 portnowait: 1317 /* 1318 * Move port event queue to a temporary event queue . 1319 * New incoming events will be continue be posted to the event queue 1320 * and they will not be considered by the current thread. 1321 * The idea is to avoid lock contentions or an often locking/unlocking 1322 * of the port queue mutex. The contention and performance degradation 1323 * could happen because: 1324 * a) incoming events use the port queue mutex to enqueue new events and 1325 * b) before the event can be delivered to the application it is 1326 * necessary to notify the event sources about the event delivery. 1327 * Sometimes the event sources can require a long time to return and 1328 * the queue mutex would block incoming events. 1329 * During this time incoming events (port_send_event()) do not need 1330 * to awake threads waiting for events. Before the current thread 1331 * returns it will check the conditions to awake other waiting threads. 1332 */ 1333 portq->portq_getn++; /* number of threads retrieving events */ 1334 port_block(portq); /* block other threads here */ 1335 nmax = max < portq->portq_nent ? max : portq->portq_nent; 1336 1337 if (portq->portq_tnent) { 1338 /* 1339 * Move remaining events from previous thread back to the 1340 * port event queue. 1341 */ 1342 port_push_eventq(portq); 1343 } 1344 /* move port event queue to a temporary queue */ 1345 list_move_tail(&portq->portq_get_list, &portq->portq_list); 1346 glist = &portq->portq_get_list; /* use temporary event queue */ 1347 tnent = portq->portq_nent; /* get current number of events */ 1348 portq->portq_nent = 0; /* no events in the port event queue */ 1349 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */ 1350 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */ 1351 1352 if (model == DATAMODEL_NATIVE) { 1353 eventsz = sizeof (port_event_t); 1354 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1355 if (kevp == NULL) { 1356 if (nmax > pp->port_max_list) 1357 nmax = pp->port_max_list; 1358 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP); 1359 } 1360 results = kevp; 1361 lev = NULL; /* start with first event in the queue */ 1362 for (nevents = 0; nevents < nmax; ) { 1363 pev = port_get_kevent(glist, lev); 1364 if (pev == NULL) /* no more events available */ 1365 break; 1366 if (pev->portkev_flags & PORT_KEV_FREE) { 1367 /* Just discard event */ 1368 list_remove(glist, pev); 1369 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1370 if (PORT_FREE_EVENT(pev)) 1371 port_free_event_local(pev, 0); 1372 tnent--; 1373 continue; 1374 } 1375 1376 /* move event data to copyout list */ 1377 if (port_copy_event(&kevp[nevents], pev, glist)) { 1378 /* 1379 * Event can not be delivered to the 1380 * current process. 1381 */ 1382 if (lev != NULL) 1383 list_insert_after(glist, lev, pev); 1384 else 1385 list_insert_head(glist, pev); 1386 lev = pev; /* last checked event */ 1387 } else { 1388 nevents++; /* # of events ready */ 1389 } 1390 } 1391 #ifdef _SYSCALL32_IMPL 1392 } else { 1393 port_event32_t *kevp32; 1394 1395 eventsz = sizeof (port_event32_t); 1396 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1397 if (kevp32 == NULL) { 1398 if (nmax > pp->port_max_list) 1399 nmax = pp->port_max_list; 1400 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP); 1401 } 1402 results = kevp32; 1403 lev = NULL; /* start with first event in the queue */ 1404 for (nevents = 0; nevents < nmax; ) { 1405 pev = port_get_kevent(glist, lev); 1406 if (pev == NULL) /* no more events available */ 1407 break; 1408 if (pev->portkev_flags & PORT_KEV_FREE) { 1409 /* Just discard event */ 1410 list_remove(glist, pev); 1411 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1412 if (PORT_FREE_EVENT(pev)) 1413 port_free_event_local(pev, 0); 1414 tnent--; 1415 continue; 1416 } 1417 1418 /* move event data to copyout list */ 1419 if (port_copy_event32(&kevp32[nevents], pev, glist)) { 1420 /* 1421 * Event can not be delivered to the 1422 * current process. 1423 */ 1424 if (lev != NULL) 1425 list_insert_after(glist, lev, pev); 1426 else 1427 list_insert_head(glist, pev); 1428 lev = pev; /* last checked event */ 1429 } else { 1430 nevents++; /* # of events ready */ 1431 } 1432 } 1433 #endif /* _SYSCALL32_IMPL */ 1434 } 1435 1436 /* 1437 * Remember number of remaining events in the temporary event queue. 1438 */ 1439 portq->portq_tnent = tnent - nevents; 1440 1441 /* 1442 * Work to do before return : 1443 * - push list of remaining events back to the top of the standard 1444 * port queue. 1445 * - if this is the last thread calling port_get(n) then wakeup the 1446 * thread waiting on close(2). 1447 * - check for a deferred cv_signal from port_send_event() and wakeup 1448 * the sleeping thread. 1449 */ 1450 1451 mutex_enter(&portq->portq_mutex); 1452 port_unblock(portq); 1453 if (portq->portq_tnent) { 1454 /* 1455 * move remaining events in the temporary event queue back 1456 * to the port event queue 1457 */ 1458 port_push_eventq(portq); 1459 } 1460 portq->portq_getn--; /* update # of threads retrieving events */ 1461 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */ 1462 /* Last thread => check close(2) conditions ... */ 1463 if (portq->portq_flags & PORTQ_CLOSE) { 1464 cv_signal(&portq->portq_closecv); 1465 mutex_exit(&portq->portq_mutex); 1466 kmem_free(results, eventsz * nmax); 1467 /* do not copyout events */ 1468 *nget = 0; 1469 return (EBADFD); 1470 } 1471 } else if (portq->portq_getn == 0) { 1472 /* 1473 * no other threads retrieving events ... 1474 * check wakeup conditions of sleeping threads 1475 */ 1476 if ((portq->portq_thread != NULL) && 1477 (portq->portq_nent >= portq->portq_nget)) 1478 cv_signal(&portq->portq_thread->portget_cv); 1479 } 1480 1481 /* 1482 * Check PORTQ_POLLIN here because the current thread set temporarily 1483 * the number of events in the queue to zero. 1484 */ 1485 if (portq->portq_flags & PORTQ_POLLIN) { 1486 portq->portq_flags &= ~PORTQ_POLLIN; 1487 mutex_exit(&portq->portq_mutex); 1488 pollwakeup(&pp->port_pollhd, POLLIN); 1489 } else { 1490 mutex_exit(&portq->portq_mutex); 1491 } 1492 1493 /* now copyout list of user event structures to user space */ 1494 if (nevents) { 1495 if (copyout(results, uevp, nevents * eventsz)) 1496 error = EFAULT; 1497 } 1498 kmem_free(results, eventsz * nmax); 1499 1500 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) { 1501 /* no events retrieved: check loop conditions */ 1502 if (blocking == -1) { 1503 /* no timeout checked */ 1504 error = port_get_timeout(pgt->pgt_timeout, 1505 &pgt->pgt_rqtime, &rqtp, &blocking, flag); 1506 if (error) { 1507 *nget = nevents; 1508 return (error); 1509 } 1510 if (rqtp != NULL) { 1511 timespec_t now; 1512 pgt->pgt_timecheck = timechanged; 1513 gethrestime(&now); 1514 timespecadd(&pgt->pgt_rqtime, &now); 1515 } 1516 pgt->pgt_rqtp = rqtp; 1517 } else { 1518 /* timeout already checked -> remember values */ 1519 pgt->pgt_rqtp = rqtp; 1520 if (rqtp != NULL) { 1521 pgt->pgt_timecheck = timecheck; 1522 pgt->pgt_rqtime = *rqtp; 1523 } 1524 } 1525 if (blocking) 1526 /* timeout remaining */ 1527 pgt->pgt_loop = 1; 1528 } 1529 1530 /* set number of user event structures completed */ 1531 *nget = nevents; 1532 return (error); 1533 } 1534 1535 /* 1536 * 1. copy kernel event structure to user event structure. 1537 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1538 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1539 * 4. Other types of event structures can be delivered back to the port cache 1540 * (port_free_event_local()). 1541 * 5. The event source callback function is the last opportunity for the 1542 * event source to update events, to free local resources associated with 1543 * the event or to deny the delivery of the event. 1544 */ 1545 static int 1546 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list) 1547 { 1548 int free_event = 0; 1549 int flags; 1550 int error; 1551 1552 puevp->portev_source = pkevp->portkev_source; 1553 puevp->portev_object = pkevp->portkev_object; 1554 puevp->portev_user = pkevp->portkev_user; 1555 puevp->portev_events = pkevp->portkev_events; 1556 1557 /* remove event from the queue */ 1558 list_remove(list, pkevp); 1559 1560 /* 1561 * Events of type PORT_KEV_WIRED remain allocated by the 1562 * event source. 1563 */ 1564 flags = pkevp->portkev_flags; 1565 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1566 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1567 else 1568 free_event = 1; 1569 1570 if (pkevp->portkev_callback) { 1571 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1572 &puevp->portev_events, pkevp->portkev_pid, 1573 PORT_CALLBACK_DEFAULT, pkevp); 1574 1575 if (error) { 1576 /* 1577 * Event can not be delivered. 1578 * Caller must reinsert the event into the queue. 1579 */ 1580 pkevp->portkev_flags = flags; 1581 return (error); 1582 } 1583 } 1584 if (free_event) 1585 port_free_event_local(pkevp, 0); 1586 return (0); 1587 } 1588 1589 #ifdef _SYSCALL32_IMPL 1590 /* 1591 * 1. copy kernel event structure to user event structure. 1592 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1593 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1594 * 4. Other types of event structures can be delivered back to the port cache 1595 * (port_free_event_local()). 1596 * 5. The event source callback function is the last opportunity for the 1597 * event source to update events, to free local resources associated with 1598 * the event or to deny the delivery of the event. 1599 */ 1600 static int 1601 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list) 1602 { 1603 int free_event = 0; 1604 int error; 1605 int flags; 1606 1607 puevp->portev_source = pkevp->portkev_source; 1608 puevp->portev_object = (daddr32_t)pkevp->portkev_object; 1609 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user; 1610 puevp->portev_events = pkevp->portkev_events; 1611 1612 /* remove event from the queue */ 1613 list_remove(list, pkevp); 1614 1615 /* 1616 * Events if type PORT_KEV_WIRED remain allocated by the 1617 * sub-system (source). 1618 */ 1619 1620 flags = pkevp->portkev_flags; 1621 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1622 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1623 else 1624 free_event = 1; 1625 1626 if (pkevp->portkev_callback != NULL) { 1627 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1628 &puevp->portev_events, pkevp->portkev_pid, 1629 PORT_CALLBACK_DEFAULT, pkevp); 1630 if (error) { 1631 /* 1632 * Event can not be delivered. 1633 * Caller must reinsert the event into the queue. 1634 */ 1635 pkevp->portkev_flags = flags; 1636 return (error); 1637 } 1638 } 1639 if (free_event) 1640 port_free_event_local(pkevp, 0); 1641 return (0); 1642 } 1643 #endif /* _SYSCALL32_IMPL */ 1644 1645 /* 1646 * copyout alert event. 1647 */ 1648 static int 1649 port_get_alert(port_alert_t *pa, port_event_t *uevp) 1650 { 1651 model_t model = get_udatamodel(); 1652 1653 /* copyout alert event structures to user space */ 1654 if (model == DATAMODEL_NATIVE) { 1655 port_event_t uev; 1656 uev.portev_source = PORT_SOURCE_ALERT; 1657 uev.portev_object = pa->portal_object; 1658 uev.portev_events = pa->portal_events; 1659 uev.portev_user = pa->portal_user; 1660 if (copyout(&uev, uevp, sizeof (port_event_t))) 1661 return (EFAULT); 1662 #ifdef _SYSCALL32_IMPL 1663 } else { 1664 port_event32_t uev32; 1665 uev32.portev_source = PORT_SOURCE_ALERT; 1666 uev32.portev_object = (daddr32_t)pa->portal_object; 1667 uev32.portev_events = pa->portal_events; 1668 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user; 1669 if (copyout(&uev32, uevp, sizeof (port_event32_t))) 1670 return (EFAULT); 1671 #endif /* _SYSCALL32_IMPL */ 1672 } 1673 return (0); 1674 } 1675 1676 /* 1677 * Check return conditions : 1678 * - pending port close(2) 1679 * - threads waiting for events 1680 */ 1681 static void 1682 port_check_return_cond(port_queue_t *portq) 1683 { 1684 ASSERT(MUTEX_HELD(&portq->portq_mutex)); 1685 portq->portq_thrcnt--; 1686 if (portq->portq_flags & PORTQ_CLOSE) { 1687 if (portq->portq_thrcnt == 0) 1688 cv_signal(&portq->portq_closecv); 1689 else 1690 cv_signal(&portq->portq_thread->portget_cv); 1691 } 1692 } 1693 1694 /* 1695 * The port_get_kevent() function returns 1696 * - the event located at the head of the queue if 'last' pointer is NULL 1697 * - the next event after the event pointed by 'last' 1698 * The caller of this function is responsible for the integrity of the queue 1699 * in use: 1700 * - port_getn() is using a temporary queue protected with port_block(). 1701 * - port_close_events() is working on the global event queue and protects 1702 * the queue with portq->portq_mutex. 1703 */ 1704 port_kevent_t * 1705 port_get_kevent(list_t *list, port_kevent_t *last) 1706 { 1707 if (last == NULL) 1708 return (list_head(list)); 1709 else 1710 return (list_next(list, last)); 1711 } 1712 1713 /* 1714 * The port_get_timeout() function gets the timeout data from user space 1715 * and converts that info into a corresponding internal representation. 1716 * The kerneldata flag means that the timeout data is already loaded. 1717 */ 1718 static int 1719 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp, 1720 int *blocking, int kerneldata) 1721 { 1722 model_t model = get_udatamodel(); 1723 1724 *rqtp = NULL; 1725 if (timeout == NULL) { 1726 *blocking = 1; 1727 return (0); 1728 } 1729 1730 if (kerneldata) { 1731 *rqtime = *timeout; 1732 } else { 1733 if (model == DATAMODEL_NATIVE) { 1734 if (copyin(timeout, rqtime, sizeof (*rqtime))) 1735 return (EFAULT); 1736 #ifdef _SYSCALL32_IMPL 1737 } else { 1738 timespec32_t wait_time_32; 1739 if (copyin(timeout, &wait_time_32, 1740 sizeof (wait_time_32))) 1741 return (EFAULT); 1742 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 1743 #endif /* _SYSCALL32_IMPL */ 1744 } 1745 } 1746 1747 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 1748 *blocking = 0; 1749 return (0); 1750 } 1751 1752 if (rqtime->tv_sec < 0 || 1753 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 1754 return (EINVAL); 1755 1756 *rqtp = rqtime; 1757 *blocking = 1; 1758 return (0); 1759 } 1760 1761 /* 1762 * port_queue_thread() 1763 * Threads requiring more events than available will be put in a wait queue. 1764 * There is a "thread wait queue" per port. 1765 * Threads requiring less events get a higher priority than others and they 1766 * will be awoken first. 1767 */ 1768 static portget_t * 1769 port_queue_thread(port_queue_t *portq, uint_t nget) 1770 { 1771 portget_t *pgetp; 1772 portget_t *ttp; 1773 portget_t *htp; 1774 1775 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP); 1776 pgetp->portget_nget = nget; 1777 pgetp->portget_pid = curproc->p_pid; 1778 if (portq->portq_thread == NULL) { 1779 /* first waiting thread */ 1780 portq->portq_thread = pgetp; 1781 portq->portq_nget = nget; 1782 pgetp->portget_prev = pgetp; 1783 pgetp->portget_next = pgetp; 1784 return (pgetp); 1785 } 1786 1787 /* 1788 * thread waiting for less events will be set on top of the queue. 1789 */ 1790 ttp = portq->portq_thread; 1791 htp = ttp; 1792 for (;;) { 1793 if (nget <= ttp->portget_nget) 1794 break; 1795 if (htp == ttp->portget_next) 1796 break; /* last event */ 1797 ttp = ttp->portget_next; 1798 } 1799 1800 /* add thread to the queue */ 1801 pgetp->portget_next = ttp; 1802 pgetp->portget_prev = ttp->portget_prev; 1803 ttp->portget_prev->portget_next = pgetp; 1804 ttp->portget_prev = pgetp; 1805 if (portq->portq_thread == ttp) 1806 portq->portq_thread = pgetp; 1807 portq->portq_nget = portq->portq_thread->portget_nget; 1808 return (pgetp); 1809 } 1810 1811 /* 1812 * Take thread out of the queue. 1813 */ 1814 static void 1815 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp) 1816 { 1817 if (pgetp->portget_next == pgetp) { 1818 /* last (single) waiting thread */ 1819 portq->portq_thread = NULL; 1820 portq->portq_nget = 0; 1821 } else { 1822 pgetp->portget_prev->portget_next = pgetp->portget_next; 1823 pgetp->portget_next->portget_prev = pgetp->portget_prev; 1824 if (portq->portq_thread == pgetp) 1825 portq->portq_thread = pgetp->portget_next; 1826 portq->portq_nget = portq->portq_thread->portget_nget; 1827 } 1828 kmem_free(pgetp, sizeof (portget_t)); 1829 } 1830 1831 /* 1832 * Set up event port kstats. 1833 */ 1834 static void 1835 port_kstat_init() 1836 { 1837 kstat_t *ksp; 1838 uint_t ndata; 1839 1840 ndata = sizeof (port_kstat) / sizeof (kstat_named_t); 1841 ksp = kstat_create("portfs", 0, "Event Ports", "misc", 1842 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL); 1843 if (ksp) { 1844 ksp->ks_data = &port_kstat; 1845 kstat_install(ksp); 1846 } 1847 } 1848