1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 2015 Joyent, Inc. All rights reserved. 29 * Copyright 2022 Oxide Computer Company 30 */ 31 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/modctl.h> 36 #include <sys/vfs.h> 37 #include <sys/vfs_opreg.h> 38 #include <sys/sysmacros.h> 39 #include <sys/cmn_err.h> 40 #include <sys/stat.h> 41 #include <sys/errno.h> 42 #include <sys/kmem.h> 43 #include <sys/file.h> 44 #include <sys/kstat.h> 45 #include <sys/port_impl.h> 46 #include <sys/task.h> 47 #include <sys/project.h> 48 49 /* 50 * Event Ports can be shared across threads or across processes. 51 * Every thread/process can use an own event port or a group of them 52 * can use a single port. A major request was also to get the ability 53 * to submit user-defined events to a port. The idea of the 54 * user-defined events is to use the event ports for communication between 55 * threads/processes (like message queues). User defined-events are queued 56 * in a port with the same priority as other event types. 57 * 58 * Events are delivered only once. The thread/process which is waiting 59 * for events with the "highest priority" (priority here is related to the 60 * internal strategy to wakeup waiting threads) will retrieve the event, 61 * all other threads/processes will not be notified. There is also 62 * the requirement to have events which should be submitted immediately 63 * to all "waiting" threads. That is the main task of the alert event. 64 * The alert event is submitted by the application to a port. The port 65 * changes from a standard mode to the alert mode. Now all waiting threads 66 * will be awaken immediately and they will return with the alert event. 67 * Threads trying to retrieve events from a port in alert mode will 68 * return immediately with the alert event. 69 * 70 * 71 * An event port is like a kernel queue, which accept events submitted from 72 * user level as well as events submitted from kernel sub-systems. Sub-systems 73 * able to submit events to a port are the so-called "event sources". 74 * Current event sources: 75 * PORT_SOURCE_AIO : events submitted per transaction completion from 76 * POSIX-I/O framework. 77 * PORT_SOURCE_TIMER : events submitted when a timer fires 78 * (see timer_create(3RT)). 79 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)). 80 * PORT_SOURCE_ALERT : events submitted from user. This is not really a 81 * single event, this is actually a port mode 82 * (see port_alert(3c)). 83 * PORT_SOURCE_USER : events submitted by applications with 84 * port_send(3c) or port_sendn(3c). 85 * PORT_SOURCE_FILE : events submitted per file being watched for file 86 * change events (see port_create(3c). 87 * 88 * There is a user API implemented in the libc library as well as a 89 * kernel API implemented in port_subr.c in genunix. 90 * The available user API functions are: 91 * port_create() : create a port as a file descriptor of portfs file system 92 * The standard close(2) function closes a port. 93 * port_associate() : associate a file descriptor with a port to be able to 94 * retrieve events from that file descriptor. 95 * port_dissociate(): remove the association of a file descriptor with a port. 96 * port_alert() : set/unset a port in alert mode 97 * port_send() : send an event of type PORT_SOURCE_USER to a port 98 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports 99 * port_get() : retrieve a single event from a port 100 * port_getn() : retrieve a list of events from a port 101 * 102 * The available kernel API functions are: 103 * port_allocate_event(): allocate an event slot/structure of/from a port 104 * port_init_event() : set event data in the event structure 105 * port_send_event() : send event to a port 106 * port_free_event() : deliver allocated slot/structure back to a port 107 * port_associate_ksource(): associate a kernel event source with a port 108 * port_dissociate_ksource(): dissociate a kernel event source from a port 109 * 110 * The libc implementation consists of small functions which pass the 111 * arguments to the kernel using the "portfs" system call. It means, all the 112 * synchronisation work is being done in the kernel. The "portfs" system 113 * call loads the portfs file system into the kernel. 114 * 115 * PORT CREATION 116 * The first function to be used is port_create() which internally creates 117 * a vnode and a portfs node. The portfs node is represented by the port_t 118 * structure, which again includes all the data necessary to control a port. 119 * port_create() returns a file descriptor, which needs to be used in almost 120 * all other event port functions. 121 * The maximum number of ports per system is controlled by the resource 122 * control: project:port-max-ids. 123 * 124 * EVENT GENERATION 125 * The second step is the triggering of events, which could be sent to a port. 126 * Every event source implements an own method to generate events for a port: 127 * PORT_SOURCE_AIO: 128 * The sigevent structure of the standard POSIX-IO functions 129 * was extended by an additional notification type. 130 * Standard notification types: 131 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD 132 * Event ports introduced now SIGEV_PORT. 133 * The notification type SIGEV_PORT specifies that a structure 134 * of type port_notify_t has to be attached to the sigev_value. 135 * The port_notify_t structure contains the event port file 136 * descriptor and a user-defined pointer. 137 * Internally the AIO implementation will use the kernel API 138 * functions to allocate an event port slot per transaction (aiocb) 139 * and sent the event to the port as soon as the transaction completes. 140 * All the events submitted per transaction are of type 141 * PORT_SOURCE_AIO. 142 * PORT_SOURCE_TIMER: 143 * The timer_create() function uses the same method as the 144 * PORT_SOURCE_AIO event source. It also uses the sigevent structure 145 * to deliver the port information. 146 * Internally the timer code will allocate a single event slot/struct 147 * per timer and it will send the timer event as soon as the timer 148 * fires. If the timer-fired event is not delivered to the application 149 * before the next period elapsed, then an overrun counter will be 150 * incremented. The timer event source uses a callback function to 151 * detect the delivery of the event to the application. At that time 152 * the timer callback function will update the event overrun counter. 153 * PORT_SOURCE_FD: 154 * This event source uses the port_associate() function to allocate 155 * an event slot/struct from a port. The application defines in the 156 * events argument of port_associate() the type of events which it is 157 * interested on. 158 * The internal pollwakeup() function is used by all the file 159 * systems --which are supporting the VOP_POLL() interface- to notify 160 * the upper layer (poll(2), devpoll(4D) and now event ports) about 161 * the event triggered (see valid events in poll(2)). 162 * The pollwakeup() function forwards the event to the layer registered 163 * to receive the current event. 164 * The port_dissociate() function can be used to free the allocated 165 * event slot from the port. Anyway, file descriptors deliver events 166 * only one time and remain deactivated until the application 167 * reactivates the association of a file descriptor with port_associate(). 168 * If an associated file descriptor is closed then the file descriptor 169 * will be dissociated automatically from the port. 170 * 171 * PORT_SOURCE_ALERT: 172 * This event type is generated when the port was previously set in 173 * alert mode using the port_alert() function. 174 * A single alert event is delivered to every thread which tries to 175 * retrieve events from a port. 176 * PORT_SOURCE_USER: 177 * This type of event is generated from user level using the port_send() 178 * function to send a user event to a port or the port_sendn() function 179 * to send an event to a list of ports. 180 * PORT_SOURCE_FILE: 181 * This event source uses the port_associate() interface to register 182 * a file to be monitored for changes. The file name that needs to be 183 * monitored is specified in the file_obj_t structure, a pointer to which 184 * is passed as an argument. The event types to be monitored are specified 185 * in the events argument. 186 * A file events monitor is represented internal per port per object 187 * address(the file_obj_t pointer). Which means there can be multiple 188 * watches registered on the same file using different file_obj_t 189 * structure pointer. With the help of the FEM(File Event Monitoring) 190 * hooks, the file's vnode ops are intercepted and relevant events 191 * delivered. The port_dissociate() function is used to de-register a 192 * file events monitor on a file. When the specified file is 193 * removed/renamed, the file events watch/monitor is automatically 194 * removed. 195 * 196 * EVENT DELIVERY / RETRIEVING EVENTS 197 * Events remain in the port queue until: 198 * - the application uses port_get() or port_getn() to retrieve events, 199 * - the event source cancel the event, 200 * - the event port is closed or 201 * - the process exits. 202 * The maximal number of events in a port queue is the maximal number 203 * of event slots/structures which can be allocated by event sources. 204 * The allocation of event slots/structures is controlled by the resource 205 * control: process.port-max-events. 206 * The port_get() function retrieves a single event and the port_getn() 207 * function retrieves a list of events. 208 * Events are classified as shareable and non-shareable events across processes. 209 * Non-shareable events are invisible for the port_get(n)() functions of 210 * processes other than the owner of the event. 211 * Shareable event types are: 212 * PORT_SOURCE_USER events 213 * This type of event is unconditionally shareable and without 214 * limitations. If the parent process sends a user event and closes 215 * the port afterwards, the event remains in the port and the child 216 * process will still be able to retrieve the user event. 217 * PORT_SOURCE_ALERT events 218 * This type of event is shareable between processes. 219 * Limitation: The alert mode of the port is removed if the owner 220 * (process which set the port in alert mode) of the 221 * alert event closes the port. 222 * PORT_SOURCE_FD events 223 * This type of event is conditional shareable between processes. 224 * After fork(2) all forked file descriptors are shareable between 225 * the processes. The child process is allowed to retrieve events 226 * from the associated file descriptors and it can also re-associate 227 * the fd with the port. 228 * Limitations: The child process is not allowed to dissociate 229 * the file descriptor from the port. Only the 230 * owner (process) of the association is allowed to 231 * dissociate the file descriptor from the port. 232 * If the owner of the association closes the port 233 * the association will be removed. 234 * PORT_SOURCE_AIO events 235 * This type of event is not shareable between processes. 236 * PORT_SOURCE_TIMER events 237 * This type of event is not shareable between processes. 238 * PORT_SOURCE_FILE events 239 * This type of event is not shareable between processes. 240 * 241 * FORK BEHAVIOUR 242 * On fork(2) the child process inherits all opened file descriptors from 243 * the parent process. This is also valid for port file descriptors. 244 * Associated file descriptors with a port maintain the association across the 245 * fork(2). It means, the child process gets full access to the port and 246 * it can retrieve events from all common associated file descriptors. 247 * Events of file descriptors created and associated with a port after the 248 * fork(2) are non-shareable and can only be retrieved by the same process. 249 * 250 * If the parent or the child process closes an exported port (using fork(2) 251 * or I_SENDFD) all the file descriptors associated with the port by the 252 * process will be dissociated from the port. Events of dissociated file 253 * descriptors as well as all non-shareable events will be discarded. 254 * The other process can continue working with the port as usual. 255 * 256 * CLOSING A PORT 257 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details. 258 * 259 * PORT EVENT STRUCTURES 260 * The global control structure of the event ports framework is port_control_t. 261 * port_control_t keeps track of the number of created ports in the system. 262 * The cache of the port event structures is also located in port_control_t. 263 * 264 * On port_create() the vnode and the portfs node is also created. 265 * The portfs node is represented by the port_t structure. 266 * The port_t structure manages all port specific tasks: 267 * - management of resource control values 268 * - port VOP_POLL interface 269 * - creation time 270 * - uid and gid of the port 271 * 272 * The port_t structure contains the port_queue_t structure. 273 * The port_queue_t structure contains all the data necessary for the 274 * queue management: 275 * - locking 276 * - condition variables 277 * - event counters 278 * - submitted events (represented by port_kevent_t structures) 279 * - threads waiting for event delivery (check portget_t structure) 280 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure) 281 * - event source management (managed by the port_source_t structure) 282 * - alert mode management (check port_alert_t structure) 283 * 284 * EVENT MANAGEMENT 285 * The event port file system creates a kmem_cache for internal allocation of 286 * event port structures. 287 * 288 * 1. Event source association with a port: 289 * The first step to do for event sources is to get associated with a port 290 * using the port_associate_ksource() function or adding an entry to the 291 * port_ksource_tab[]. An event source can get dissociated from a port 292 * using the port_dissociate_ksource() function. An entry in the 293 * port_ksource_tab[] implies that the source will be associated 294 * automatically with every new created port. 295 * The event source can deliver a callback function, which is used by the 296 * port to notify the event source about close(2). The idea is that 297 * in such a case the event source should free all allocated resources 298 * and it must return to the port all allocated slots/structures. 299 * The port_close() function will wait until all allocated event 300 * structures/slots are returned to the port. 301 * The callback function is not necessary when the event source does not 302 * maintain local resources, a second condition is that the event source 303 * can guarantee that allocated event slots will be returned without 304 * delay to the port (it will not block and sleep somewhere). 305 * 306 * 2. Reservation of an event slot / event structure 307 * The event port reliability is based on the reservation of an event "slot" 308 * (allocation of an event structure) by the event source as part of the 309 * application call. If the maximal number of event slots is exhausted then 310 * the event source can return a corresponding error code to the application. 311 * 312 * The port_alloc_event() function has to be used by event sources to 313 * allocate an event slot (reserve an event structure). The port_alloc_event() 314 * doesn not block and it will return a 0 value on success or an error code 315 * if it fails. 316 * An argument of port_alloc_event() is a flag which determines the behavior 317 * of the event after it was delivered to the application: 318 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the 319 * application. 320 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event 321 * source. This kind of slots can not be used for 322 * event delivery and should only be used internally 323 * by the event source. 324 * PORT_KEV_CACHED : event slot remains under the control of an event 325 * port cache. It does not become free after delivery 326 * to the application. 327 * PORT_ALLOC_SCACHED : event slot remains under the control of the event 328 * source. The event source takes the control over 329 * the slot after the event is delivered to the 330 * application. 331 * 332 * 3. Delivery of events to the event port 333 * Earlier allocated event structure/slot has to be used to deliver 334 * event data to the port. Event source has to use the function 335 * port_send_event(). The single argument is a pointer to the previously 336 * reserved event structure/slot. 337 * The portkev_events field of the port_kevent_t structure can be updated/set 338 * in two ways: 339 * 1. using the port_set_event() function, or 340 * 2. updating the portkev_events field out of the callback function: 341 * The event source can deliver a callback function to the port as an 342 * argument of port_init_event(). 343 * One of the arguments of the callback function is a pointer to the 344 * events field, which will be delivered to the application. 345 * (see Delivery of events to the application). 346 * Event structures/slots can be delivered to the event port only one time, 347 * they remain blocked until the data is delivered to the application and the 348 * slot becomes free or it is delivered back to the event source 349 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above 350 * is at the same time the indicator for the event source that the event 351 * structure/slot is free for reuse. 352 * 353 * 4. Delivery of events to the application 354 * The events structures/slots delivered by event sources remain in the 355 * port queue until they are retrieved by the application or the port 356 * is closed (exit(2) also closes all opened file descriptors).. 357 * The application uses port_get() or port_getn() to retrieve events from 358 * a port. port_get() retrieves a single event structure/slot and port_getn() 359 * retrieves a list of event structures/slots. 360 * Both functions are able to poll for events and return immediately or they 361 * can specify a timeout value. 362 * Before the events are delivered to the application they are moved to a 363 * second temporary internal queue. The idea is to avoid lock collisions or 364 * contentions of the global queue lock. 365 * The global queue lock is used every time when an event source delivers 366 * new events to the port. 367 * The port_get() and port_getn() functions 368 * a) retrieve single events from the temporary queue, 369 * b) prepare the data to be passed to the application memory, 370 * c) activate the callback function of the event sources: 371 * - to get the latest event data, 372 * - the event source can free all allocated resources associated with the 373 * current event, 374 * - the event source can re-use the current event slot/structure 375 * - the event source can deny the delivery of the event to the application 376 * (e.g. because of the wrong process). 377 * d) put the event back to the temporary queue if the event delivery was denied 378 * e) repeat a) until d) as long as there are events in the queue and 379 * there is enough user space available. 380 * 381 * The loop described above could block for a very long time the global mutex, 382 * to avoid that a second mutex was introduced to synchronized concurrent 383 * threads accessing the temporary queue. 384 */ 385 386 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, 387 uintptr_t); 388 389 static struct sysent port_sysent = { 390 6, 391 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 392 (int (*)())(uintptr_t)portfs, 393 }; 394 395 static struct modlsys modlsys = { 396 &mod_syscallops, "event ports", &port_sysent 397 }; 398 399 #ifdef _SYSCALL32_IMPL 400 401 static int64_t 402 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4, 403 uint32_t arg5, uint32_t arg6); 404 405 static struct sysent port_sysent32 = { 406 6, 407 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 408 (int (*)())(uintptr_t)portfs32, 409 }; 410 411 static struct modlsys modlsys32 = { 412 &mod_syscallops32, 413 "32-bit event ports syscalls", 414 &port_sysent32 415 }; 416 #endif /* _SYSCALL32_IMPL */ 417 418 static struct modlinkage modlinkage = { 419 MODREV_1, 420 &modlsys, 421 #ifdef _SYSCALL32_IMPL 422 &modlsys32, 423 #endif 424 NULL 425 }; 426 427 port_kstat_t port_kstat = { 428 { "ports", KSTAT_DATA_UINT32 } 429 }; 430 431 dev_t portdev; 432 struct vnodeops *port_vnodeops; 433 struct vfs port_vfs; 434 435 extern rctl_hndl_t rc_process_portev; 436 extern rctl_hndl_t rc_project_portids; 437 extern void aio_close_port(void *, int, pid_t, int); 438 439 /* 440 * This table contains a list of event sources which need a static 441 * association with a port (every port). 442 * The last NULL entry in the table is required to detect "end of table". 443 */ 444 struct port_ksource port_ksource_tab[] = { 445 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL}, 446 {0, NULL, NULL, NULL} 447 }; 448 449 /* local functions */ 450 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *, 451 port_gettimer_t *); 452 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *); 453 static int port_alert(port_t *, int, int, void *); 454 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *); 455 static int port_send(port_t *, int, int, void *); 456 static int port_create(int *); 457 static int port_get_alert(port_alert_t *, port_event_t *); 458 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *); 459 static int *port_errorn(int *, int, int, int); 460 static int port_noshare(void *, int *, pid_t, int, void *); 461 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *, 462 int); 463 static void port_init(port_t *); 464 static void port_remove_alert(port_queue_t *); 465 static void port_add_ksource_local(port_t *, port_ksource_t *); 466 static void port_check_return_cond(port_queue_t *); 467 static void port_dequeue_thread(port_queue_t *, portget_t *); 468 static portget_t *port_queue_thread(port_queue_t *, uint_t); 469 static void port_kstat_init(void); 470 471 #ifdef _SYSCALL32_IMPL 472 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *); 473 #endif 474 475 int 476 _init(void) 477 { 478 static const fs_operation_def_t port_vfsops_template[] = { 479 NULL, NULL 480 }; 481 extern const fs_operation_def_t port_vnodeops_template[]; 482 vfsops_t *port_vfsops; 483 int error; 484 major_t major; 485 486 if ((major = getudev()) == (major_t)-1) 487 return (ENXIO); 488 portdev = makedevice(major, 0); 489 490 /* Create a dummy vfs */ 491 error = vfs_makefsops(port_vfsops_template, &port_vfsops); 492 if (error) { 493 cmn_err(CE_WARN, "port init: bad vfs ops"); 494 return (error); 495 } 496 vfs_setops(&port_vfs, port_vfsops); 497 port_vfs.vfs_flag = VFS_RDONLY; 498 port_vfs.vfs_dev = portdev; 499 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0); 500 501 error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops); 502 if (error) { 503 vfs_freevfsops(port_vfsops); 504 cmn_err(CE_WARN, "port init: bad vnode ops"); 505 return (error); 506 } 507 508 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL); 509 port_control.pc_nents = 0; /* number of active ports */ 510 511 /* create kmem_cache for port event structures */ 512 port_control.pc_cache = kmem_cache_create("port_cache", 513 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 514 515 port_kstat_init(); /* init port kstats */ 516 return (mod_install(&modlinkage)); 517 } 518 519 int 520 _info(struct modinfo *modinfop) 521 { 522 return (mod_info(&modlinkage, modinfop)); 523 } 524 525 /* 526 * System call wrapper for all port related system calls from 32-bit programs. 527 */ 528 #ifdef _SYSCALL32_IMPL 529 static int64_t 530 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, 531 uint32_t a4) 532 { 533 int64_t error; 534 535 switch (opcode & PORT_CODE_MASK) { 536 case PORT_GET: 537 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4); 538 break; 539 case PORT_SENDN: 540 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4); 541 break; 542 default: 543 error = portfs(opcode, a0, a1, a2, a3, a4); 544 break; 545 } 546 return (error); 547 } 548 #endif /* _SYSCALL32_IMPL */ 549 550 /* 551 * System entry point for port functions. 552 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE). 553 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a 554 * port file descriptor as first argument. 555 */ 556 static int64_t 557 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, 558 uintptr_t a4) 559 { 560 rval_t r; 561 port_t *pp; 562 int error = 0; 563 uint_t nget; 564 file_t *fp; 565 port_gettimer_t port_timer; 566 567 r.r_vals = 0; 568 if (opcode & PORT_SYS_NOPORT) { 569 opcode &= PORT_CODE_MASK; 570 if (opcode == PORT_SENDN) { 571 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2, 572 (int)a3, (void *)a4, (uint_t *)&r.r_val1); 573 if (error && (error != EIO)) 574 return ((int64_t)set_errno(error)); 575 return (r.r_vals); 576 } 577 578 if (opcode == PORT_CREATE) { 579 error = port_create(&r.r_val1); 580 if (error) 581 return ((int64_t)set_errno(error)); 582 return (r.r_vals); 583 } 584 } 585 586 /* opcodes using port as first argument (a0) */ 587 588 if ((fp = getf((int)a0)) == NULL) 589 return ((uintptr_t)set_errno(EBADF)); 590 591 if (fp->f_vnode->v_type != VPORT) { 592 releasef((int)a0); 593 return ((uintptr_t)set_errno(EBADFD)); 594 } 595 596 pp = VTOEP(fp->f_vnode); 597 598 switch (opcode & PORT_CODE_MASK) { 599 case PORT_GET: 600 { 601 /* see PORT_GETN description */ 602 struct timespec timeout; 603 604 port_timer.pgt_flags = PORTGET_ONE; 605 port_timer.pgt_loop = 0; 606 port_timer.pgt_rqtp = NULL; 607 if (a4 != 0) { 608 port_timer.pgt_timeout = &timeout; 609 timeout.tv_sec = (time_t)a2; 610 timeout.tv_nsec = (long)a3; 611 } else { 612 port_timer.pgt_timeout = NULL; 613 } 614 do { 615 nget = 1; 616 error = port_getn(pp, (port_event_t *)a1, 1, 617 (uint_t *)&nget, &port_timer); 618 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 619 break; 620 } 621 case PORT_GETN: 622 { 623 /* 624 * port_getn() can only retrieve own or shareable events from 625 * other processes. The port_getn() function remains in the 626 * kernel until own or shareable events are available or the 627 * timeout elapses. 628 */ 629 port_timer.pgt_flags = 0; 630 port_timer.pgt_loop = 0; 631 port_timer.pgt_rqtp = NULL; 632 port_timer.pgt_timeout = (struct timespec *)a4; 633 do { 634 nget = a3; 635 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2, 636 (uint_t *)&nget, &port_timer); 637 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 638 r.r_val1 = nget; 639 r.r_val2 = error; 640 releasef((int)a0); 641 if (error && error != ETIME) 642 return ((int64_t)set_errno(error)); 643 return (r.r_vals); 644 } 645 case PORT_ASSOCIATE: 646 { 647 switch ((int)a1) { 648 case PORT_SOURCE_FD: 649 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, 650 (int)a3, (void *)a4); 651 break; 652 case PORT_SOURCE_FILE: 653 error = port_associate_fop(pp, (int)a1, (uintptr_t)a2, 654 (int)a3, (void *)a4); 655 break; 656 default: 657 error = EINVAL; 658 break; 659 } 660 break; 661 } 662 case PORT_SEND: 663 { 664 /* user-defined events */ 665 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2); 666 break; 667 } 668 case PORT_DISPATCH: 669 { 670 /* 671 * library events, blocking 672 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ 673 * are currently allowed. 674 */ 675 if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) { 676 error = EINVAL; 677 break; 678 } 679 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2, 680 (uintptr_t)a3, (void *)a4); 681 break; 682 } 683 case PORT_DISSOCIATE: 684 { 685 switch ((int)a1) { 686 case PORT_SOURCE_FD: 687 error = port_dissociate_fd(pp, (uintptr_t)a2); 688 break; 689 case PORT_SOURCE_FILE: 690 error = port_dissociate_fop(pp, (uintptr_t)a2); 691 break; 692 default: 693 error = EINVAL; 694 break; 695 } 696 break; 697 } 698 case PORT_ALERT: 699 { 700 if ((int)a2) /* a2 = events */ 701 error = port_alert(pp, (int)a1, (int)a2, (void *)a3); 702 else 703 port_remove_alert(&pp->port_queue); 704 break; 705 } 706 default: 707 error = EINVAL; 708 break; 709 } 710 711 releasef((int)a0); 712 if (error) 713 return ((int64_t)set_errno(error)); 714 return (r.r_vals); 715 } 716 717 /* 718 * System call to create a port. 719 * 720 * The port_create() function creates a vnode of type VPORT per port. 721 * The port control data is associated with the vnode as vnode private data. 722 * The port_create() function returns an event port file descriptor. 723 */ 724 static int 725 port_create(int *fdp) 726 { 727 port_t *pp; 728 vnode_t *vp; 729 struct file *fp; 730 proc_t *p = curproc; 731 732 /* initialize vnode and port private data */ 733 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP); 734 735 pp->port_vnode = vn_alloc(KM_SLEEP); 736 vp = EPTOV(pp); 737 vn_setops(vp, port_vnodeops); 738 vp->v_type = VPORT; 739 vp->v_vfsp = &port_vfs; 740 vp->v_data = (caddr_t)pp; 741 742 mutex_enter(&port_control.pc_mutex); 743 /* 744 * Retrieve the maximal number of event ports allowed per system from 745 * the resource control: project.port-max-ids. 746 */ 747 mutex_enter(&p->p_lock); 748 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p, 749 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) { 750 mutex_exit(&p->p_lock); 751 vn_free(vp); 752 kmem_free(pp, sizeof (port_t)); 753 mutex_exit(&port_control.pc_mutex); 754 return (EAGAIN); 755 } 756 757 /* 758 * Retrieve the maximal number of events allowed per port from 759 * the resource control: process.port-max-events. 760 */ 761 pp->port_max_events = rctl_enforced_value(rc_process_portev, 762 p->p_rctls, p); 763 mutex_exit(&p->p_lock); 764 765 /* allocate a new user file descriptor and a file structure */ 766 if (falloc(vp, 0, &fp, fdp)) { 767 /* 768 * If the file table is full, free allocated resources. 769 */ 770 vn_free(vp); 771 kmem_free(pp, sizeof (port_t)); 772 mutex_exit(&port_control.pc_mutex); 773 return (EMFILE); 774 } 775 776 mutex_exit(&fp->f_tlock); 777 778 pp->port_fd = *fdp; 779 port_control.pc_nents++; 780 p->p_portcnt++; 781 port_kstat.pks_ports.value.ui32++; 782 mutex_exit(&port_control.pc_mutex); 783 784 /* initializes port private data */ 785 port_init(pp); 786 /* set user file pointer */ 787 setf(*fdp, fp); 788 return (0); 789 } 790 791 /* 792 * port_init() initializes event port specific data 793 */ 794 static void 795 port_init(port_t *pp) 796 { 797 port_queue_t *portq; 798 port_ksource_t *pks; 799 800 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL); 801 portq = &pp->port_queue; 802 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL); 803 pp->port_flags |= PORT_INIT; 804 805 /* 806 * If it is not enough memory available to satisfy a user 807 * request using a single port_getn() call then port_getn() 808 * will reduce the size of the list to PORT_MAX_LIST. 809 */ 810 pp->port_max_list = port_max_list; 811 812 /* Set timestamp entries required for fstat(2) requests */ 813 gethrestime(&pp->port_ctime); 814 pp->port_uid = crgetuid(curproc->p_cred); 815 pp->port_gid = crgetgid(curproc->p_cred); 816 817 /* initialize port queue structs */ 818 list_create(&portq->portq_list, sizeof (port_kevent_t), 819 offsetof(port_kevent_t, portkev_node)); 820 list_create(&portq->portq_get_list, sizeof (port_kevent_t), 821 offsetof(port_kevent_t, portkev_node)); 822 portq->portq_flags = 0; 823 pp->port_pid = curproc->p_pid; 824 825 /* Allocate cache skeleton for PORT_SOURCE_FD events */ 826 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP); 827 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL); 828 portq->portq_pcp->pc_flag = PC_PORTFS; 829 830 /* 831 * Allocate cache skeleton for association of event sources. 832 */ 833 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL); 834 portq->portq_scache = kmem_zalloc( 835 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP); 836 837 /* 838 * pre-associate some kernel sources with this port. 839 * The pre-association is required to create port_source_t 840 * structures for object association. 841 * Some sources can not get associated with a port before the first 842 * object association is requested. Another reason to pre_associate 843 * a particular source with a port is because of performance. 844 */ 845 846 for (pks = port_ksource_tab; pks->pks_source != 0; pks++) 847 port_add_ksource_local(pp, pks); 848 } 849 850 /* 851 * The port_add_ksource_local() function is being used to associate 852 * event sources with every new port. 853 * The event sources need to be added to port_ksource_tab[]. 854 */ 855 static void 856 port_add_ksource_local(port_t *pp, port_ksource_t *pks) 857 { 858 port_source_t *pse; 859 port_source_t **ps; 860 861 mutex_enter(&pp->port_queue.portq_source_mutex); 862 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)]; 863 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { 864 if (pse->portsrc_source == pks->pks_source) 865 break; 866 } 867 868 if (pse == NULL) { 869 /* associate new source with the port */ 870 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP); 871 pse->portsrc_source = pks->pks_source; 872 pse->portsrc_close = pks->pks_close; 873 pse->portsrc_closearg = pks->pks_closearg; 874 pse->portsrc_cnt = 1; 875 876 pks->pks_portsrc = pse; 877 if (*ps != NULL) 878 pse->portsrc_next = (*ps)->portsrc_next; 879 *ps = pse; 880 } 881 mutex_exit(&pp->port_queue.portq_source_mutex); 882 } 883 884 /* 885 * The port_send() function sends an event of type "source" to a 886 * port. This function is non-blocking. An event can be sent to 887 * a port as long as the number of events per port does not achieve the 888 * maximal allowed number of events. The max. number of events per port is 889 * defined by the resource control process.max-port-events. 890 * This function is used by the port library function port_send() 891 * and port_dispatch(). The port_send(3c) function is part of the 892 * event ports API and submits events of type PORT_SOURCE_USER. The 893 * port_dispatch() function is project private and it is used by library 894 * functions to submit events of other types than PORT_SOURCE_USER 895 * (e.g. PORT_SOURCE_AIO). 896 */ 897 static int 898 port_send(port_t *pp, int source, int events, void *user) 899 { 900 port_kevent_t *pev; 901 int error; 902 903 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev); 904 if (error) 905 return (error); 906 907 pev->portkev_object = 0; 908 pev->portkev_events = events; 909 pev->portkev_user = user; 910 pev->portkev_callback = NULL; 911 pev->portkev_arg = NULL; 912 pev->portkev_flags = 0; 913 914 port_send_event(pev); 915 return (0); 916 } 917 918 /* 919 * The port_noshare() function returns 0 if the current event was generated 920 * by the same process. Otherwise is returns a value other than 0 and the 921 * event should not be delivered to the current processe. 922 * The port_noshare() function is normally used by the port_dispatch() 923 * function. The port_dispatch() function is project private and can only be 924 * used within the event port project. 925 * Currently the libaio uses the port_dispatch() function to deliver events 926 * of types PORT_SOURCE_AIO. 927 */ 928 /* ARGSUSED */ 929 static int 930 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp) 931 { 932 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid) 933 return (1); 934 return (0); 935 } 936 937 /* 938 * The port_dispatch_event() function is project private and it is used by 939 * libraries involved in the project to deliver events to the port. 940 * port_dispatch will sleep and wait for enough resources to satisfy the 941 * request, if necessary. 942 * The library can specify if the delivered event is shareable with other 943 * processes (see PORT_SYS_NOSHARE flag). 944 */ 945 static int 946 port_dispatch_event(port_t *pp, int opcode, int source, int events, 947 uintptr_t object, void *user) 948 { 949 port_kevent_t *pev; 950 int error; 951 952 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev); 953 if (error) 954 return (error); 955 956 pev->portkev_object = object; 957 pev->portkev_events = events; 958 pev->portkev_user = user; 959 pev->portkev_arg = NULL; 960 if (opcode & PORT_SYS_NOSHARE) { 961 pev->portkev_flags = PORT_KEV_NOSHARE; 962 pev->portkev_callback = port_noshare; 963 } else { 964 pev->portkev_flags = 0; 965 pev->portkev_callback = NULL; 966 } 967 968 port_send_event(pev); 969 return (0); 970 } 971 972 973 /* 974 * The port_sendn() function is the kernel implementation of the event 975 * port API function port_sendn(3c). 976 * This function is able to send an event to a list of event ports. 977 */ 978 static int 979 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user, 980 uint_t *nget) 981 { 982 port_kevent_t *pev; 983 int errorcnt = 0; 984 int error = 0; 985 int count; 986 int port; 987 int *plist; 988 int *elist = NULL; 989 file_t *fp; 990 port_t *pp; 991 992 if (nent == 0 || nent > port_max_list) 993 return (EINVAL); 994 995 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP); 996 if (copyin((void *)ports, plist, nent * sizeof (int))) { 997 kmem_free(plist, nent * sizeof (int)); 998 return (EFAULT); 999 } 1000 1001 /* 1002 * Scan the list for event port file descriptors and send the 1003 * attached user event data embedded in a event of type 1004 * PORT_SOURCE_USER to every event port in the list. 1005 * If a list entry is not a valid event port then the corresponding 1006 * error code will be stored in the errors[] list with the same 1007 * list offset as in the ports[] list. 1008 */ 1009 1010 for (count = 0; count < nent; count++) { 1011 port = plist[count]; 1012 if ((fp = getf(port)) == NULL) { 1013 elist = port_errorn(elist, nent, EBADF, count); 1014 errorcnt++; 1015 continue; 1016 } 1017 1018 pp = VTOEP(fp->f_vnode); 1019 if (fp->f_vnode->v_type != VPORT) { 1020 releasef(port); 1021 elist = port_errorn(elist, nent, EBADFD, count); 1022 errorcnt++; 1023 continue; 1024 } 1025 1026 error = port_alloc_event_local(pp, PORT_SOURCE_USER, 1027 PORT_ALLOC_DEFAULT, &pev); 1028 if (error) { 1029 releasef(port); 1030 elist = port_errorn(elist, nent, error, count); 1031 errorcnt++; 1032 continue; 1033 } 1034 1035 pev->portkev_object = 0; 1036 pev->portkev_events = events; 1037 pev->portkev_user = user; 1038 pev->portkev_callback = NULL; 1039 pev->portkev_arg = NULL; 1040 pev->portkev_flags = 0; 1041 1042 port_send_event(pev); 1043 releasef(port); 1044 } 1045 if (errorcnt) { 1046 error = EIO; 1047 if (copyout(elist, (void *)errors, nent * sizeof (int))) 1048 error = EFAULT; 1049 kmem_free(elist, nent * sizeof (int)); 1050 } 1051 *nget = nent - errorcnt; 1052 kmem_free(plist, nent * sizeof (int)); 1053 return (error); 1054 } 1055 1056 static int * 1057 port_errorn(int *elist, int nent, int error, int index) 1058 { 1059 if (elist == NULL) 1060 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP); 1061 elist[index] = error; 1062 return (elist); 1063 } 1064 1065 /* 1066 * port_alert() 1067 * The port_alert() funcion is a high priority event and it is always set 1068 * on top of the queue. It is also delivered as single event. 1069 * flags: 1070 * - SET :overwrite current alert data 1071 * - UPDATE:set alert data or return EBUSY if alert mode is already set 1072 * 1073 * - set the ALERT flag 1074 * - wakeup all sleeping threads 1075 */ 1076 static int 1077 port_alert(port_t *pp, int flags, int events, void *user) 1078 { 1079 port_queue_t *portq; 1080 portget_t *pgetp; 1081 port_alert_t *pa; 1082 1083 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID) 1084 return (EINVAL); 1085 1086 portq = &pp->port_queue; 1087 pa = &portq->portq_alert; 1088 mutex_enter(&portq->portq_mutex); 1089 1090 /* check alert conditions */ 1091 if (flags == PORT_ALERT_UPDATE) { 1092 if (portq->portq_flags & PORTQ_ALERT) { 1093 mutex_exit(&portq->portq_mutex); 1094 return (EBUSY); 1095 } 1096 } 1097 1098 /* 1099 * Store alert data in the port to be delivered to threads 1100 * which are using port_get(n) to retrieve events. 1101 */ 1102 1103 portq->portq_flags |= PORTQ_ALERT; 1104 pa->portal_events = events; /* alert info */ 1105 pa->portal_pid = curproc->p_pid; /* process owner */ 1106 pa->portal_object = 0; /* no object */ 1107 pa->portal_user = user; /* user alert data */ 1108 1109 /* alert and deliver alert data to waiting threads */ 1110 pgetp = portq->portq_thread; 1111 if (pgetp == NULL) { 1112 /* no threads waiting for events */ 1113 mutex_exit(&portq->portq_mutex); 1114 return (0); 1115 } 1116 1117 /* 1118 * Set waiting threads in alert mode (PORTGET_ALERT).. 1119 * Every thread waiting for events already allocated a portget_t 1120 * structure to sleep on. 1121 * The port alert arguments are stored in the portget_t structure. 1122 * The PORTGET_ALERT flag is set to indicate the thread to return 1123 * immediately with the alert event. 1124 */ 1125 do { 1126 if ((pgetp->portget_state & PORTGET_ALERT) == 0) { 1127 pa = &pgetp->portget_alert; 1128 pa->portal_events = events; 1129 pa->portal_object = 0; 1130 pa->portal_user = user; 1131 pgetp->portget_state |= PORTGET_ALERT; 1132 cv_signal(&pgetp->portget_cv); 1133 } 1134 } while ((pgetp = pgetp->portget_next) != portq->portq_thread); 1135 mutex_exit(&portq->portq_mutex); 1136 return (0); 1137 } 1138 1139 /* 1140 * Clear alert state of the port 1141 */ 1142 static void 1143 port_remove_alert(port_queue_t *portq) 1144 { 1145 mutex_enter(&portq->portq_mutex); 1146 portq->portq_flags &= ~PORTQ_ALERT; 1147 mutex_exit(&portq->portq_mutex); 1148 } 1149 1150 /* 1151 * The port_getn() function is used to retrieve events from a port. 1152 * 1153 * The port_getn() function returns immediately if there are enough events 1154 * available in the port to satisfy the request or if the port is in alert 1155 * mode (see port_alert(3c)). 1156 * The timeout argument of port_getn(3c) -which is embedded in the 1157 * port_gettimer_t structure- specifies if the system call should block or if it 1158 * should return immediately depending on the number of events available. 1159 * This function is internally used by port_getn(3c) as well as by 1160 * port_get(3c). 1161 */ 1162 static int 1163 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget, 1164 port_gettimer_t *pgt) 1165 { 1166 port_queue_t *portq; 1167 port_kevent_t *pev; 1168 port_kevent_t *lev; 1169 int error = 0; 1170 uint_t nmax; 1171 uint_t nevents; 1172 uint_t eventsz; 1173 port_event_t *kevp; 1174 list_t *glist; 1175 uint_t tnent; 1176 int rval; 1177 int blocking = -1; 1178 int timecheck; 1179 int flag; 1180 timespec_t rqtime; 1181 timespec_t *rqtp = NULL; 1182 portget_t *pgetp; 1183 void *results; 1184 model_t model = get_udatamodel(); 1185 1186 flag = pgt->pgt_flags; 1187 1188 if (*nget > max && max > 0) 1189 return (EINVAL); 1190 1191 portq = &pp->port_queue; 1192 mutex_enter(&portq->portq_mutex); 1193 if (max == 0) { 1194 /* 1195 * Return number of objects with events. 1196 * The port_block() call is required to synchronize this 1197 * thread with another possible thread, which could be 1198 * retrieving events from the port queue. 1199 */ 1200 port_block(portq); 1201 /* 1202 * Check if a second thread is currently retrieving events 1203 * and it is using the temporary event queue. 1204 */ 1205 if (portq->portq_tnent) { 1206 /* put remaining events back to the port queue */ 1207 port_push_eventq(portq); 1208 } 1209 *nget = portq->portq_nent; 1210 port_unblock(portq); 1211 mutex_exit(&portq->portq_mutex); 1212 return (0); 1213 } 1214 1215 if (uevp == NULL) { 1216 mutex_exit(&portq->portq_mutex); 1217 return (EFAULT); 1218 } 1219 if (*nget == 0) { /* no events required */ 1220 mutex_exit(&portq->portq_mutex); 1221 return (0); 1222 } 1223 1224 /* port is being closed ... */ 1225 if (portq->portq_flags & PORTQ_CLOSE) { 1226 mutex_exit(&portq->portq_mutex); 1227 return (EBADFD); 1228 } 1229 1230 /* return immediately if port in alert mode */ 1231 if (portq->portq_flags & PORTQ_ALERT) { 1232 error = port_get_alert(&portq->portq_alert, uevp); 1233 if (error == 0) 1234 *nget = 1; 1235 mutex_exit(&portq->portq_mutex); 1236 return (error); 1237 } 1238 1239 portq->portq_thrcnt++; 1240 1241 /* 1242 * Now check if the completed events satisfy the 1243 * "wait" requirements of the current thread: 1244 */ 1245 1246 if (pgt->pgt_loop) { 1247 /* 1248 * loop entry of same thread 1249 * pgt_loop is set when the current thread returns 1250 * prematurely from this function. That could happen 1251 * when a port is being shared between processes and 1252 * this thread could not find events to return. 1253 * It is not allowed to a thread to retrieve non-shareable 1254 * events generated in other processes. 1255 * PORTQ_WAIT_EVENTS is set when a thread already 1256 * checked the current event queue and no new events 1257 * are added to the queue. 1258 */ 1259 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) && 1260 (portq->portq_nent >= *nget)) { 1261 /* some new events arrived ...check them */ 1262 goto portnowait; 1263 } 1264 rqtp = pgt->pgt_rqtp; 1265 timecheck = pgt->pgt_timecheck; 1266 pgt->pgt_flags |= PORTGET_WAIT_EVENTS; 1267 } else { 1268 /* check if enough events are available ... */ 1269 if (portq->portq_nent >= *nget) 1270 goto portnowait; 1271 /* 1272 * There are not enough events available to satisfy 1273 * the request, check timeout value and wait for 1274 * incoming events. 1275 */ 1276 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp, 1277 &blocking, flag); 1278 if (error) { 1279 port_check_return_cond(portq); 1280 mutex_exit(&portq->portq_mutex); 1281 return (error); 1282 } 1283 1284 if (blocking == 0) /* don't block, check fired events */ 1285 goto portnowait; 1286 1287 if (rqtp != NULL) { 1288 timespec_t now; 1289 timecheck = timechanged; 1290 gethrestime(&now); 1291 timespecadd(rqtp, &now); 1292 } 1293 } 1294 1295 /* enqueue thread in the list of waiting threads */ 1296 pgetp = port_queue_thread(portq, *nget); 1297 1298 1299 /* Wait here until return conditions met */ 1300 for (;;) { 1301 if (pgetp->portget_state & PORTGET_ALERT) { 1302 /* reap alert event and return */ 1303 error = port_get_alert(&pgetp->portget_alert, uevp); 1304 if (error) 1305 *nget = 0; 1306 else 1307 *nget = 1; 1308 port_dequeue_thread(&pp->port_queue, pgetp); 1309 portq->portq_thrcnt--; 1310 mutex_exit(&portq->portq_mutex); 1311 return (error); 1312 } 1313 1314 /* 1315 * Check if some other thread is already retrieving 1316 * events (portq_getn > 0). 1317 */ 1318 1319 if ((portq->portq_getn == 0) && 1320 ((portq)->portq_nent >= *nget) && 1321 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) || 1322 !((portq)->portq_flags & PORTQ_WAIT_EVENTS))) 1323 break; 1324 1325 if (portq->portq_flags & PORTQ_CLOSE) { 1326 error = EBADFD; 1327 break; 1328 } 1329 1330 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex, 1331 rqtp, timecheck); 1332 1333 if (rval <= 0) { 1334 error = (rval == 0) ? EINTR : ETIME; 1335 break; 1336 } 1337 } 1338 1339 /* take thread out of the wait queue */ 1340 port_dequeue_thread(portq, pgetp); 1341 1342 if (error != 0 && (error == EINTR || error == EBADFD || 1343 (error == ETIME && flag))) { 1344 /* return without events */ 1345 port_check_return_cond(portq); 1346 mutex_exit(&portq->portq_mutex); 1347 return (error); 1348 } 1349 1350 portnowait: 1351 /* 1352 * Move port event queue to a temporary event queue . 1353 * New incoming events will be continue be posted to the event queue 1354 * and they will not be considered by the current thread. 1355 * The idea is to avoid lock contentions or an often locking/unlocking 1356 * of the port queue mutex. The contention and performance degradation 1357 * could happen because: 1358 * a) incoming events use the port queue mutex to enqueue new events and 1359 * b) before the event can be delivered to the application it is 1360 * necessary to notify the event sources about the event delivery. 1361 * Sometimes the event sources can require a long time to return and 1362 * the queue mutex would block incoming events. 1363 * During this time incoming events (port_send_event()) do not need 1364 * to awake threads waiting for events. Before the current thread 1365 * returns it will check the conditions to awake other waiting threads. 1366 */ 1367 portq->portq_getn++; /* number of threads retrieving events */ 1368 port_block(portq); /* block other threads here */ 1369 nmax = max < portq->portq_nent ? max : portq->portq_nent; 1370 1371 if (portq->portq_tnent) { 1372 /* 1373 * Move remaining events from previous thread back to the 1374 * port event queue. 1375 */ 1376 port_push_eventq(portq); 1377 } 1378 /* move port event queue to a temporary queue */ 1379 list_move_tail(&portq->portq_get_list, &portq->portq_list); 1380 glist = &portq->portq_get_list; /* use temporary event queue */ 1381 tnent = portq->portq_nent; /* get current number of events */ 1382 portq->portq_nent = 0; /* no events in the port event queue */ 1383 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */ 1384 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */ 1385 1386 if (model == DATAMODEL_NATIVE) { 1387 eventsz = sizeof (port_event_t); 1388 1389 if (nmax == 0) { 1390 kevp = NULL; 1391 } else { 1392 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1393 if (kevp == NULL) { 1394 if (nmax > pp->port_max_list) 1395 nmax = pp->port_max_list; 1396 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP); 1397 } 1398 } 1399 1400 results = kevp; 1401 lev = NULL; /* start with first event in the queue */ 1402 for (nevents = 0; nevents < nmax; ) { 1403 pev = port_get_kevent(glist, lev); 1404 if (pev == NULL) /* no more events available */ 1405 break; 1406 if (pev->portkev_flags & PORT_KEV_FREE) { 1407 /* Just discard event */ 1408 list_remove(glist, pev); 1409 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1410 if (PORT_FREE_EVENT(pev)) 1411 port_free_event_local(pev, 0); 1412 tnent--; 1413 continue; 1414 } 1415 1416 /* move event data to copyout list */ 1417 if (port_copy_event(&kevp[nevents], pev, glist)) { 1418 /* 1419 * Event can not be delivered to the 1420 * current process. 1421 */ 1422 if (lev != NULL) 1423 list_insert_after(glist, lev, pev); 1424 else 1425 list_insert_head(glist, pev); 1426 lev = pev; /* last checked event */ 1427 } else { 1428 nevents++; /* # of events ready */ 1429 } 1430 } 1431 #ifdef _SYSCALL32_IMPL 1432 } else { 1433 port_event32_t *kevp32; 1434 1435 eventsz = sizeof (port_event32_t); 1436 1437 if (nmax == 0) { 1438 kevp32 = NULL; 1439 } else { 1440 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1441 if (kevp32 == NULL) { 1442 if (nmax > pp->port_max_list) 1443 nmax = pp->port_max_list; 1444 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP); 1445 } 1446 } 1447 1448 results = kevp32; 1449 lev = NULL; /* start with first event in the queue */ 1450 for (nevents = 0; nevents < nmax; ) { 1451 pev = port_get_kevent(glist, lev); 1452 if (pev == NULL) /* no more events available */ 1453 break; 1454 if (pev->portkev_flags & PORT_KEV_FREE) { 1455 /* Just discard event */ 1456 list_remove(glist, pev); 1457 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1458 if (PORT_FREE_EVENT(pev)) 1459 port_free_event_local(pev, 0); 1460 tnent--; 1461 continue; 1462 } 1463 1464 /* move event data to copyout list */ 1465 if (port_copy_event32(&kevp32[nevents], pev, glist)) { 1466 /* 1467 * Event can not be delivered to the 1468 * current process. 1469 */ 1470 if (lev != NULL) 1471 list_insert_after(glist, lev, pev); 1472 else 1473 list_insert_head(glist, pev); 1474 lev = pev; /* last checked event */ 1475 } else { 1476 nevents++; /* # of events ready */ 1477 } 1478 } 1479 #endif /* _SYSCALL32_IMPL */ 1480 } 1481 1482 /* 1483 * Remember number of remaining events in the temporary event queue. 1484 */ 1485 portq->portq_tnent = tnent - nevents; 1486 1487 /* 1488 * Work to do before return : 1489 * - push list of remaining events back to the top of the standard 1490 * port queue. 1491 * - if this is the last thread calling port_get(n) then wakeup the 1492 * thread waiting on close(2). 1493 * - check for a deferred cv_signal from port_send_event() and wakeup 1494 * the sleeping thread. 1495 */ 1496 1497 mutex_enter(&portq->portq_mutex); 1498 port_unblock(portq); 1499 if (portq->portq_tnent) { 1500 /* 1501 * move remaining events in the temporary event queue back 1502 * to the port event queue 1503 */ 1504 port_push_eventq(portq); 1505 } 1506 portq->portq_getn--; /* update # of threads retrieving events */ 1507 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */ 1508 /* Last thread => check close(2) conditions ... */ 1509 if (portq->portq_flags & PORTQ_CLOSE) { 1510 cv_signal(&portq->portq_closecv); 1511 mutex_exit(&portq->portq_mutex); 1512 kmem_free(results, eventsz * nmax); 1513 /* do not copyout events */ 1514 *nget = 0; 1515 return (EBADFD); 1516 } 1517 } else if (portq->portq_getn == 0) { 1518 /* 1519 * no other threads retrieving events ... 1520 * check wakeup conditions of sleeping threads 1521 */ 1522 if ((portq->portq_thread != NULL) && 1523 (portq->portq_nent >= portq->portq_nget)) 1524 cv_signal(&portq->portq_thread->portget_cv); 1525 } 1526 1527 /* 1528 * Check PORTQ_POLLIN here because the current thread set temporarily 1529 * the number of events in the queue to zero. 1530 */ 1531 if (portq->portq_flags & PORTQ_POLLIN) { 1532 portq->portq_flags &= ~PORTQ_POLLIN; 1533 mutex_exit(&portq->portq_mutex); 1534 pollwakeup(&pp->port_pollhd, POLLIN); 1535 } else { 1536 mutex_exit(&portq->portq_mutex); 1537 } 1538 1539 /* now copyout list of user event structures to user space */ 1540 if (nevents) { 1541 if (copyout(results, uevp, nevents * eventsz)) 1542 error = EFAULT; 1543 } 1544 kmem_free(results, eventsz * nmax); 1545 1546 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) { 1547 /* no events retrieved: check loop conditions */ 1548 if (blocking == -1) { 1549 /* no timeout checked */ 1550 error = port_get_timeout(pgt->pgt_timeout, 1551 &pgt->pgt_rqtime, &rqtp, &blocking, flag); 1552 if (error) { 1553 *nget = nevents; 1554 return (error); 1555 } 1556 if (rqtp != NULL) { 1557 timespec_t now; 1558 pgt->pgt_timecheck = timechanged; 1559 gethrestime(&now); 1560 timespecadd(&pgt->pgt_rqtime, &now); 1561 } 1562 pgt->pgt_rqtp = rqtp; 1563 } else { 1564 /* timeout already checked -> remember values */ 1565 pgt->pgt_rqtp = rqtp; 1566 if (rqtp != NULL) { 1567 pgt->pgt_timecheck = timecheck; 1568 pgt->pgt_rqtime = *rqtp; 1569 } 1570 } 1571 if (blocking) 1572 /* timeout remaining */ 1573 pgt->pgt_loop = 1; 1574 } 1575 1576 /* set number of user event structures completed */ 1577 *nget = nevents; 1578 return (error); 1579 } 1580 1581 /* 1582 * 1. copy kernel event structure to user event structure. 1583 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1584 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1585 * 4. Other types of event structures can be delivered back to the port cache 1586 * (port_free_event_local()). 1587 * 5. The event source callback function is the last opportunity for the 1588 * event source to update events, to free local resources associated with 1589 * the event or to deny the delivery of the event. 1590 */ 1591 static int 1592 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list) 1593 { 1594 int free_event = 0; 1595 int flags; 1596 int error; 1597 1598 puevp->portev_source = pkevp->portkev_source; 1599 puevp->portev_object = pkevp->portkev_object; 1600 puevp->portev_user = pkevp->portkev_user; 1601 puevp->portev_events = pkevp->portkev_events; 1602 1603 /* remove event from the queue */ 1604 list_remove(list, pkevp); 1605 1606 /* 1607 * Events of type PORT_KEV_WIRED remain allocated by the 1608 * event source. 1609 */ 1610 flags = pkevp->portkev_flags; 1611 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1612 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1613 else 1614 free_event = 1; 1615 1616 if (pkevp->portkev_callback) { 1617 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1618 &puevp->portev_events, pkevp->portkev_pid, 1619 PORT_CALLBACK_DEFAULT, pkevp); 1620 1621 if (error) { 1622 /* 1623 * Event can not be delivered. 1624 * Caller must reinsert the event into the queue. 1625 */ 1626 pkevp->portkev_flags = flags; 1627 return (error); 1628 } 1629 } 1630 if (free_event) 1631 port_free_event_local(pkevp, 0); 1632 return (0); 1633 } 1634 1635 #ifdef _SYSCALL32_IMPL 1636 /* 1637 * 1. copy kernel event structure to user event structure. 1638 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1639 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1640 * 4. Other types of event structures can be delivered back to the port cache 1641 * (port_free_event_local()). 1642 * 5. The event source callback function is the last opportunity for the 1643 * event source to update events, to free local resources associated with 1644 * the event or to deny the delivery of the event. 1645 */ 1646 static int 1647 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list) 1648 { 1649 int free_event = 0; 1650 int error; 1651 int flags; 1652 1653 puevp->portev_source = pkevp->portkev_source; 1654 puevp->portev_object = (daddr32_t)pkevp->portkev_object; 1655 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user; 1656 puevp->portev_events = pkevp->portkev_events; 1657 1658 /* remove event from the queue */ 1659 list_remove(list, pkevp); 1660 1661 /* 1662 * Events if type PORT_KEV_WIRED remain allocated by the 1663 * sub-system (source). 1664 */ 1665 1666 flags = pkevp->portkev_flags; 1667 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1668 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1669 else 1670 free_event = 1; 1671 1672 if (pkevp->portkev_callback != NULL) { 1673 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1674 &puevp->portev_events, pkevp->portkev_pid, 1675 PORT_CALLBACK_DEFAULT, pkevp); 1676 if (error) { 1677 /* 1678 * Event can not be delivered. 1679 * Caller must reinsert the event into the queue. 1680 */ 1681 pkevp->portkev_flags = flags; 1682 return (error); 1683 } 1684 } 1685 if (free_event) 1686 port_free_event_local(pkevp, 0); 1687 return (0); 1688 } 1689 #endif /* _SYSCALL32_IMPL */ 1690 1691 /* 1692 * copyout alert event. 1693 */ 1694 static int 1695 port_get_alert(port_alert_t *pa, port_event_t *uevp) 1696 { 1697 model_t model = get_udatamodel(); 1698 1699 /* copyout alert event structures to user space */ 1700 if (model == DATAMODEL_NATIVE) { 1701 port_event_t uev; 1702 uev.portev_source = PORT_SOURCE_ALERT; 1703 uev.portev_object = pa->portal_object; 1704 uev.portev_events = pa->portal_events; 1705 uev.portev_user = pa->portal_user; 1706 if (copyout(&uev, uevp, sizeof (port_event_t))) 1707 return (EFAULT); 1708 #ifdef _SYSCALL32_IMPL 1709 } else { 1710 port_event32_t uev32; 1711 uev32.portev_source = PORT_SOURCE_ALERT; 1712 uev32.portev_object = (daddr32_t)pa->portal_object; 1713 uev32.portev_events = pa->portal_events; 1714 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user; 1715 if (copyout(&uev32, uevp, sizeof (port_event32_t))) 1716 return (EFAULT); 1717 #endif /* _SYSCALL32_IMPL */ 1718 } 1719 return (0); 1720 } 1721 1722 /* 1723 * Check return conditions : 1724 * - pending port close(2) 1725 * - threads waiting for events 1726 */ 1727 static void 1728 port_check_return_cond(port_queue_t *portq) 1729 { 1730 ASSERT(MUTEX_HELD(&portq->portq_mutex)); 1731 portq->portq_thrcnt--; 1732 if (portq->portq_flags & PORTQ_CLOSE) { 1733 if (portq->portq_thrcnt == 0) 1734 cv_signal(&portq->portq_closecv); 1735 else 1736 cv_signal(&portq->portq_thread->portget_cv); 1737 } 1738 } 1739 1740 /* 1741 * The port_get_kevent() function returns 1742 * - the event located at the head of the queue if 'last' pointer is NULL 1743 * - the next event after the event pointed by 'last' 1744 * The caller of this function is responsible for the integrity of the queue 1745 * in use: 1746 * - port_getn() is using a temporary queue protected with port_block(). 1747 * - port_close_events() is working on the global event queue and protects 1748 * the queue with portq->portq_mutex. 1749 */ 1750 port_kevent_t * 1751 port_get_kevent(list_t *list, port_kevent_t *last) 1752 { 1753 if (last == NULL) 1754 return (list_head(list)); 1755 else 1756 return (list_next(list, last)); 1757 } 1758 1759 /* 1760 * The port_get_timeout() function gets the timeout data from user space 1761 * and converts that info into a corresponding internal representation. 1762 * The kerneldata flag means that the timeout data is already loaded. 1763 */ 1764 static int 1765 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp, 1766 int *blocking, int kerneldata) 1767 { 1768 model_t model = get_udatamodel(); 1769 1770 *rqtp = NULL; 1771 if (timeout == NULL) { 1772 *blocking = 1; 1773 return (0); 1774 } 1775 1776 if (kerneldata) { 1777 *rqtime = *timeout; 1778 } else { 1779 if (model == DATAMODEL_NATIVE) { 1780 if (copyin(timeout, rqtime, sizeof (*rqtime))) 1781 return (EFAULT); 1782 #ifdef _SYSCALL32_IMPL 1783 } else { 1784 timespec32_t wait_time_32; 1785 if (copyin(timeout, &wait_time_32, 1786 sizeof (wait_time_32))) 1787 return (EFAULT); 1788 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 1789 #endif /* _SYSCALL32_IMPL */ 1790 } 1791 } 1792 1793 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 1794 *blocking = 0; 1795 return (0); 1796 } 1797 1798 if (rqtime->tv_sec < 0 || 1799 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 1800 return (EINVAL); 1801 1802 *rqtp = rqtime; 1803 *blocking = 1; 1804 return (0); 1805 } 1806 1807 /* 1808 * port_queue_thread() 1809 * Threads requiring more events than available will be put in a wait queue. 1810 * There is a "thread wait queue" per port. 1811 * Threads requiring less events get a higher priority than others and they 1812 * will be awoken first. 1813 */ 1814 static portget_t * 1815 port_queue_thread(port_queue_t *portq, uint_t nget) 1816 { 1817 portget_t *pgetp; 1818 portget_t *ttp; 1819 portget_t *htp; 1820 1821 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP); 1822 pgetp->portget_nget = nget; 1823 pgetp->portget_pid = curproc->p_pid; 1824 if (portq->portq_thread == NULL) { 1825 /* first waiting thread */ 1826 portq->portq_thread = pgetp; 1827 portq->portq_nget = nget; 1828 pgetp->portget_prev = pgetp; 1829 pgetp->portget_next = pgetp; 1830 return (pgetp); 1831 } 1832 1833 /* 1834 * thread waiting for less events will be set on top of the queue. 1835 */ 1836 ttp = portq->portq_thread; 1837 htp = ttp; 1838 for (;;) { 1839 if (nget <= ttp->portget_nget) 1840 break; 1841 if (htp == ttp->portget_next) 1842 break; /* last event */ 1843 ttp = ttp->portget_next; 1844 } 1845 1846 /* add thread to the queue */ 1847 pgetp->portget_next = ttp; 1848 pgetp->portget_prev = ttp->portget_prev; 1849 ttp->portget_prev->portget_next = pgetp; 1850 ttp->portget_prev = pgetp; 1851 if (portq->portq_thread == ttp) 1852 portq->portq_thread = pgetp; 1853 portq->portq_nget = portq->portq_thread->portget_nget; 1854 return (pgetp); 1855 } 1856 1857 /* 1858 * Take thread out of the queue. 1859 */ 1860 static void 1861 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp) 1862 { 1863 if (pgetp->portget_next == pgetp) { 1864 /* last (single) waiting thread */ 1865 portq->portq_thread = NULL; 1866 portq->portq_nget = 0; 1867 } else { 1868 pgetp->portget_prev->portget_next = pgetp->portget_next; 1869 pgetp->portget_next->portget_prev = pgetp->portget_prev; 1870 if (portq->portq_thread == pgetp) 1871 portq->portq_thread = pgetp->portget_next; 1872 portq->portq_nget = portq->portq_thread->portget_nget; 1873 } 1874 kmem_free(pgetp, sizeof (portget_t)); 1875 } 1876 1877 /* 1878 * Set up event port kstats. 1879 */ 1880 static void 1881 port_kstat_init() 1882 { 1883 kstat_t *ksp; 1884 uint_t ndata; 1885 1886 ndata = sizeof (port_kstat) / sizeof (kstat_named_t); 1887 ksp = kstat_create("portfs", 0, "Event Ports", "misc", 1888 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL); 1889 if (ksp) { 1890 ksp->ks_data = &port_kstat; 1891 kstat_install(ksp); 1892 } 1893 } 1894