1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 2015 Joyent, Inc. All rights reserved. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/systm.h> 33 #include <sys/cred.h> 34 #include <sys/modctl.h> 35 #include <sys/vfs.h> 36 #include <sys/vfs_opreg.h> 37 #include <sys/sysmacros.h> 38 #include <sys/cmn_err.h> 39 #include <sys/stat.h> 40 #include <sys/errno.h> 41 #include <sys/kmem.h> 42 #include <sys/file.h> 43 #include <sys/kstat.h> 44 #include <sys/port_impl.h> 45 #include <sys/task.h> 46 #include <sys/project.h> 47 48 /* 49 * Event Ports can be shared across threads or across processes. 50 * Every thread/process can use an own event port or a group of them 51 * can use a single port. A major request was also to get the ability 52 * to submit user-defined events to a port. The idea of the 53 * user-defined events is to use the event ports for communication between 54 * threads/processes (like message queues). User defined-events are queued 55 * in a port with the same priority as other event types. 56 * 57 * Events are delivered only once. The thread/process which is waiting 58 * for events with the "highest priority" (priority here is related to the 59 * internal strategy to wakeup waiting threads) will retrieve the event, 60 * all other threads/processes will not be notified. There is also 61 * the requirement to have events which should be submitted immediately 62 * to all "waiting" threads. That is the main task of the alert event. 63 * The alert event is submitted by the application to a port. The port 64 * changes from a standard mode to the alert mode. Now all waiting threads 65 * will be awaken immediately and they will return with the alert event. 66 * Threads trying to retrieve events from a port in alert mode will 67 * return immediately with the alert event. 68 * 69 * 70 * An event port is like a kernel queue, which accept events submitted from 71 * user level as well as events submitted from kernel sub-systems. Sub-systems 72 * able to submit events to a port are the so-called "event sources". 73 * Current event sources: 74 * PORT_SOURCE_AIO : events submitted per transaction completion from 75 * POSIX-I/O framework. 76 * PORT_SOURCE_TIMER : events submitted when a timer fires 77 * (see timer_create(3RT)). 78 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)). 79 * PORT_SOURCE_ALERT : events submitted from user. This is not really a 80 * single event, this is actually a port mode 81 * (see port_alert(3c)). 82 * PORT_SOURCE_USER : events submitted by applications with 83 * port_send(3c) or port_sendn(3c). 84 * PORT_SOURCE_FILE : events submitted per file being watched for file 85 * change events (see port_create(3c). 86 * 87 * There is a user API implemented in the libc library as well as a 88 * kernel API implemented in port_subr.c in genunix. 89 * The available user API functions are: 90 * port_create() : create a port as a file descriptor of portfs file system 91 * The standard close(2) function closes a port. 92 * port_associate() : associate a file descriptor with a port to be able to 93 * retrieve events from that file descriptor. 94 * port_dissociate(): remove the association of a file descriptor with a port. 95 * port_alert() : set/unset a port in alert mode 96 * port_send() : send an event of type PORT_SOURCE_USER to a port 97 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports 98 * port_get() : retrieve a single event from a port 99 * port_getn() : retrieve a list of events from a port 100 * 101 * The available kernel API functions are: 102 * port_allocate_event(): allocate an event slot/structure of/from a port 103 * port_init_event() : set event data in the event structure 104 * port_send_event() : send event to a port 105 * port_free_event() : deliver allocated slot/structure back to a port 106 * port_associate_ksource(): associate a kernel event source with a port 107 * port_dissociate_ksource(): dissociate a kernel event source from a port 108 * 109 * The libc implementation consists of small functions which pass the 110 * arguments to the kernel using the "portfs" system call. It means, all the 111 * synchronisation work is being done in the kernel. The "portfs" system 112 * call loads the portfs file system into the kernel. 113 * 114 * PORT CREATION 115 * The first function to be used is port_create() which internally creates 116 * a vnode and a portfs node. The portfs node is represented by the port_t 117 * structure, which again includes all the data necessary to control a port. 118 * port_create() returns a file descriptor, which needs to be used in almost 119 * all other event port functions. 120 * The maximum number of ports per system is controlled by the resource 121 * control: project:port-max-ids. 122 * 123 * EVENT GENERATION 124 * The second step is the triggering of events, which could be sent to a port. 125 * Every event source implements an own method to generate events for a port: 126 * PORT_SOURCE_AIO: 127 * The sigevent structure of the standard POSIX-IO functions 128 * was extended by an additional notification type. 129 * Standard notification types: 130 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD 131 * Event ports introduced now SIGEV_PORT. 132 * The notification type SIGEV_PORT specifies that a structure 133 * of type port_notify_t has to be attached to the sigev_value. 134 * The port_notify_t structure contains the event port file 135 * descriptor and a user-defined pointer. 136 * Internally the AIO implementation will use the kernel API 137 * functions to allocate an event port slot per transaction (aiocb) 138 * and sent the event to the port as soon as the transaction completes. 139 * All the events submitted per transaction are of type 140 * PORT_SOURCE_AIO. 141 * PORT_SOURCE_TIMER: 142 * The timer_create() function uses the same method as the 143 * PORT_SOURCE_AIO event source. It also uses the sigevent structure 144 * to deliver the port information. 145 * Internally the timer code will allocate a single event slot/struct 146 * per timer and it will send the timer event as soon as the timer 147 * fires. If the timer-fired event is not delivered to the application 148 * before the next period elapsed, then an overrun counter will be 149 * incremented. The timer event source uses a callback function to 150 * detect the delivery of the event to the application. At that time 151 * the timer callback function will update the event overrun counter. 152 * PORT_SOURCE_FD: 153 * This event source uses the port_associate() function to allocate 154 * an event slot/struct from a port. The application defines in the 155 * events argument of port_associate() the type of events which it is 156 * interested on. 157 * The internal pollwakeup() function is used by all the file 158 * systems --which are supporting the VOP_POLL() interface- to notify 159 * the upper layer (poll(2), devpoll(4D) and now event ports) about 160 * the event triggered (see valid events in poll(2)). 161 * The pollwakeup() function forwards the event to the layer registered 162 * to receive the current event. 163 * The port_dissociate() function can be used to free the allocated 164 * event slot from the port. Anyway, file descriptors deliver events 165 * only one time and remain deactivated until the application 166 * reactivates the association of a file descriptor with port_associate(). 167 * If an associated file descriptor is closed then the file descriptor 168 * will be dissociated automatically from the port. 169 * 170 * PORT_SOURCE_ALERT: 171 * This event type is generated when the port was previously set in 172 * alert mode using the port_alert() function. 173 * A single alert event is delivered to every thread which tries to 174 * retrieve events from a port. 175 * PORT_SOURCE_USER: 176 * This type of event is generated from user level using the port_send() 177 * function to send a user event to a port or the port_sendn() function 178 * to send an event to a list of ports. 179 * PORT_SOURCE_FILE: 180 * This event source uses the port_associate() interface to register 181 * a file to be monitored for changes. The file name that needs to be 182 * monitored is specified in the file_obj_t structure, a pointer to which 183 * is passed as an argument. The event types to be monitored are specified 184 * in the events argument. 185 * A file events monitor is represented internal per port per object 186 * address(the file_obj_t pointer). Which means there can be multiple 187 * watches registered on the same file using different file_obj_t 188 * structure pointer. With the help of the FEM(File Event Monitoring) 189 * hooks, the file's vnode ops are intercepted and relevant events 190 * delivered. The port_dissociate() function is used to de-register a 191 * file events monitor on a file. When the specified file is 192 * removed/renamed, the file events watch/monitor is automatically 193 * removed. 194 * 195 * EVENT DELIVERY / RETRIEVING EVENTS 196 * Events remain in the port queue until: 197 * - the application uses port_get() or port_getn() to retrieve events, 198 * - the event source cancel the event, 199 * - the event port is closed or 200 * - the process exits. 201 * The maximal number of events in a port queue is the maximal number 202 * of event slots/structures which can be allocated by event sources. 203 * The allocation of event slots/structures is controlled by the resource 204 * control: process.port-max-events. 205 * The port_get() function retrieves a single event and the port_getn() 206 * function retrieves a list of events. 207 * Events are classified as shareable and non-shareable events across processes. 208 * Non-shareable events are invisible for the port_get(n)() functions of 209 * processes other than the owner of the event. 210 * Shareable event types are: 211 * PORT_SOURCE_USER events 212 * This type of event is unconditionally shareable and without 213 * limitations. If the parent process sends a user event and closes 214 * the port afterwards, the event remains in the port and the child 215 * process will still be able to retrieve the user event. 216 * PORT_SOURCE_ALERT events 217 * This type of event is shareable between processes. 218 * Limitation: The alert mode of the port is removed if the owner 219 * (process which set the port in alert mode) of the 220 * alert event closes the port. 221 * PORT_SOURCE_FD events 222 * This type of event is conditional shareable between processes. 223 * After fork(2) all forked file descriptors are shareable between 224 * the processes. The child process is allowed to retrieve events 225 * from the associated file descriptors and it can also re-associate 226 * the fd with the port. 227 * Limitations: The child process is not allowed to dissociate 228 * the file descriptor from the port. Only the 229 * owner (process) of the association is allowed to 230 * dissociate the file descriptor from the port. 231 * If the owner of the association closes the port 232 * the association will be removed. 233 * PORT_SOURCE_AIO events 234 * This type of event is not shareable between processes. 235 * PORT_SOURCE_TIMER events 236 * This type of event is not shareable between processes. 237 * PORT_SOURCE_FILE events 238 * This type of event is not shareable between processes. 239 * 240 * FORK BEHAVIOUR 241 * On fork(2) the child process inherits all opened file descriptors from 242 * the parent process. This is also valid for port file descriptors. 243 * Associated file descriptors with a port maintain the association across the 244 * fork(2). It means, the child process gets full access to the port and 245 * it can retrieve events from all common associated file descriptors. 246 * Events of file descriptors created and associated with a port after the 247 * fork(2) are non-shareable and can only be retrieved by the same process. 248 * 249 * If the parent or the child process closes an exported port (using fork(2) 250 * or I_SENDFD) all the file descriptors associated with the port by the 251 * process will be dissociated from the port. Events of dissociated file 252 * descriptors as well as all non-shareable events will be discarded. 253 * The other process can continue working with the port as usual. 254 * 255 * CLOSING A PORT 256 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details. 257 * 258 * PORT EVENT STRUCTURES 259 * The global control structure of the event ports framework is port_control_t. 260 * port_control_t keeps track of the number of created ports in the system. 261 * The cache of the port event structures is also located in port_control_t. 262 * 263 * On port_create() the vnode and the portfs node is also created. 264 * The portfs node is represented by the port_t structure. 265 * The port_t structure manages all port specific tasks: 266 * - management of resource control values 267 * - port VOP_POLL interface 268 * - creation time 269 * - uid and gid of the port 270 * 271 * The port_t structure contains the port_queue_t structure. 272 * The port_queue_t structure contains all the data necessary for the 273 * queue management: 274 * - locking 275 * - condition variables 276 * - event counters 277 * - submitted events (represented by port_kevent_t structures) 278 * - threads waiting for event delivery (check portget_t structure) 279 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure) 280 * - event source management (managed by the port_source_t structure) 281 * - alert mode management (check port_alert_t structure) 282 * 283 * EVENT MANAGEMENT 284 * The event port file system creates a kmem_cache for internal allocation of 285 * event port structures. 286 * 287 * 1. Event source association with a port: 288 * The first step to do for event sources is to get associated with a port 289 * using the port_associate_ksource() function or adding an entry to the 290 * port_ksource_tab[]. An event source can get dissociated from a port 291 * using the port_dissociate_ksource() function. An entry in the 292 * port_ksource_tab[] implies that the source will be associated 293 * automatically with every new created port. 294 * The event source can deliver a callback function, which is used by the 295 * port to notify the event source about close(2). The idea is that 296 * in such a case the event source should free all allocated resources 297 * and it must return to the port all allocated slots/structures. 298 * The port_close() function will wait until all allocated event 299 * structures/slots are returned to the port. 300 * The callback function is not necessary when the event source does not 301 * maintain local resources, a second condition is that the event source 302 * can guarantee that allocated event slots will be returned without 303 * delay to the port (it will not block and sleep somewhere). 304 * 305 * 2. Reservation of an event slot / event structure 306 * The event port reliability is based on the reservation of an event "slot" 307 * (allocation of an event structure) by the event source as part of the 308 * application call. If the maximal number of event slots is exhausted then 309 * the event source can return a corresponding error code to the application. 310 * 311 * The port_alloc_event() function has to be used by event sources to 312 * allocate an event slot (reserve an event structure). The port_alloc_event() 313 * doesn not block and it will return a 0 value on success or an error code 314 * if it fails. 315 * An argument of port_alloc_event() is a flag which determines the behavior 316 * of the event after it was delivered to the application: 317 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the 318 * application. 319 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event 320 * source. This kind of slots can not be used for 321 * event delivery and should only be used internally 322 * by the event source. 323 * PORT_KEV_CACHED : event slot remains under the control of an event 324 * port cache. It does not become free after delivery 325 * to the application. 326 * PORT_ALLOC_SCACHED : event slot remains under the control of the event 327 * source. The event source takes the control over 328 * the slot after the event is delivered to the 329 * application. 330 * 331 * 3. Delivery of events to the event port 332 * Earlier allocated event structure/slot has to be used to deliver 333 * event data to the port. Event source has to use the function 334 * port_send_event(). The single argument is a pointer to the previously 335 * reserved event structure/slot. 336 * The portkev_events field of the port_kevent_t structure can be updated/set 337 * in two ways: 338 * 1. using the port_set_event() function, or 339 * 2. updating the portkev_events field out of the callback function: 340 * The event source can deliver a callback function to the port as an 341 * argument of port_init_event(). 342 * One of the arguments of the callback function is a pointer to the 343 * events field, which will be delivered to the application. 344 * (see Delivery of events to the application). 345 * Event structures/slots can be delivered to the event port only one time, 346 * they remain blocked until the data is delivered to the application and the 347 * slot becomes free or it is delivered back to the event source 348 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above 349 * is at the same time the indicator for the event source that the event 350 * structure/slot is free for reuse. 351 * 352 * 4. Delivery of events to the application 353 * The events structures/slots delivered by event sources remain in the 354 * port queue until they are retrieved by the application or the port 355 * is closed (exit(2) also closes all opened file descriptors).. 356 * The application uses port_get() or port_getn() to retrieve events from 357 * a port. port_get() retrieves a single event structure/slot and port_getn() 358 * retrieves a list of event structures/slots. 359 * Both functions are able to poll for events and return immediately or they 360 * can specify a timeout value. 361 * Before the events are delivered to the application they are moved to a 362 * second temporary internal queue. The idea is to avoid lock collisions or 363 * contentions of the global queue lock. 364 * The global queue lock is used every time when an event source delivers 365 * new events to the port. 366 * The port_get() and port_getn() functions 367 * a) retrieve single events from the temporary queue, 368 * b) prepare the data to be passed to the application memory, 369 * c) activate the callback function of the event sources: 370 * - to get the latest event data, 371 * - the event source can free all allocated resources associated with the 372 * current event, 373 * - the event source can re-use the current event slot/structure 374 * - the event source can deny the delivery of the event to the application 375 * (e.g. because of the wrong process). 376 * d) put the event back to the temporary queue if the event delivery was denied 377 * e) repeat a) until d) as long as there are events in the queue and 378 * there is enough user space available. 379 * 380 * The loop described above could block for a very long time the global mutex, 381 * to avoid that a second mutex was introduced to synchronized concurrent 382 * threads accessing the temporary queue. 383 */ 384 385 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, 386 uintptr_t); 387 388 static struct sysent port_sysent = { 389 6, 390 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 391 (int (*)())(uintptr_t)portfs, 392 }; 393 394 static struct modlsys modlsys = { 395 &mod_syscallops, "event ports", &port_sysent 396 }; 397 398 #ifdef _SYSCALL32_IMPL 399 400 static int64_t 401 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4, 402 uint32_t arg5, uint32_t arg6); 403 404 static struct sysent port_sysent32 = { 405 6, 406 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 407 (int (*)())(uintptr_t)portfs32, 408 }; 409 410 static struct modlsys modlsys32 = { 411 &mod_syscallops32, 412 "32-bit event ports syscalls", 413 &port_sysent32 414 }; 415 #endif /* _SYSCALL32_IMPL */ 416 417 static struct modlinkage modlinkage = { 418 MODREV_1, 419 &modlsys, 420 #ifdef _SYSCALL32_IMPL 421 &modlsys32, 422 #endif 423 NULL 424 }; 425 426 port_kstat_t port_kstat = { 427 { "ports", KSTAT_DATA_UINT32 } 428 }; 429 430 dev_t portdev; 431 struct vnodeops *port_vnodeops; 432 struct vfs port_vfs; 433 434 extern rctl_hndl_t rc_process_portev; 435 extern rctl_hndl_t rc_project_portids; 436 extern void aio_close_port(void *, int, pid_t, int); 437 438 /* 439 * This table contains a list of event sources which need a static 440 * association with a port (every port). 441 * The last NULL entry in the table is required to detect "end of table". 442 */ 443 struct port_ksource port_ksource_tab[] = { 444 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL}, 445 {0, NULL, NULL, NULL} 446 }; 447 448 /* local functions */ 449 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *, 450 port_gettimer_t *); 451 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *); 452 static int port_alert(port_t *, int, int, void *); 453 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *); 454 static int port_send(port_t *, int, int, void *); 455 static int port_create(int *); 456 static int port_get_alert(port_alert_t *, port_event_t *); 457 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *); 458 static int *port_errorn(int *, int, int, int); 459 static int port_noshare(void *, int *, pid_t, int, void *); 460 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *, 461 int); 462 static void port_init(port_t *); 463 static void port_remove_alert(port_queue_t *); 464 static void port_add_ksource_local(port_t *, port_ksource_t *); 465 static void port_check_return_cond(port_queue_t *); 466 static void port_dequeue_thread(port_queue_t *, portget_t *); 467 static portget_t *port_queue_thread(port_queue_t *, uint_t); 468 static void port_kstat_init(void); 469 470 #ifdef _SYSCALL32_IMPL 471 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *); 472 #endif 473 474 int 475 _init(void) 476 { 477 static const fs_operation_def_t port_vfsops_template[] = { 478 NULL, NULL 479 }; 480 extern const fs_operation_def_t port_vnodeops_template[]; 481 vfsops_t *port_vfsops; 482 int error; 483 major_t major; 484 485 if ((major = getudev()) == (major_t)-1) 486 return (ENXIO); 487 portdev = makedevice(major, 0); 488 489 /* Create a dummy vfs */ 490 error = vfs_makefsops(port_vfsops_template, &port_vfsops); 491 if (error) { 492 cmn_err(CE_WARN, "port init: bad vfs ops"); 493 return (error); 494 } 495 vfs_setops(&port_vfs, port_vfsops); 496 port_vfs.vfs_flag = VFS_RDONLY; 497 port_vfs.vfs_dev = portdev; 498 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0); 499 500 error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops); 501 if (error) { 502 vfs_freevfsops(port_vfsops); 503 cmn_err(CE_WARN, "port init: bad vnode ops"); 504 return (error); 505 } 506 507 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL); 508 port_control.pc_nents = 0; /* number of active ports */ 509 510 /* create kmem_cache for port event structures */ 511 port_control.pc_cache = kmem_cache_create("port_cache", 512 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 513 514 port_kstat_init(); /* init port kstats */ 515 return (mod_install(&modlinkage)); 516 } 517 518 int 519 _info(struct modinfo *modinfop) 520 { 521 return (mod_info(&modlinkage, modinfop)); 522 } 523 524 /* 525 * System call wrapper for all port related system calls from 32-bit programs. 526 */ 527 #ifdef _SYSCALL32_IMPL 528 static int64_t 529 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, 530 uint32_t a4) 531 { 532 int64_t error; 533 534 switch (opcode & PORT_CODE_MASK) { 535 case PORT_GET: 536 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4); 537 break; 538 case PORT_SENDN: 539 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4); 540 break; 541 default: 542 error = portfs(opcode, a0, a1, a2, a3, a4); 543 break; 544 } 545 return (error); 546 } 547 #endif /* _SYSCALL32_IMPL */ 548 549 /* 550 * System entry point for port functions. 551 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE). 552 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a 553 * port file descriptor as first argument. 554 */ 555 static int64_t 556 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, 557 uintptr_t a4) 558 { 559 rval_t r; 560 port_t *pp; 561 int error = 0; 562 uint_t nget; 563 file_t *fp; 564 port_gettimer_t port_timer; 565 566 r.r_vals = 0; 567 if (opcode & PORT_SYS_NOPORT) { 568 opcode &= PORT_CODE_MASK; 569 if (opcode == PORT_SENDN) { 570 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2, 571 (int)a3, (void *)a4, (uint_t *)&r.r_val1); 572 if (error && (error != EIO)) 573 return ((int64_t)set_errno(error)); 574 return (r.r_vals); 575 } 576 577 if (opcode == PORT_CREATE) { 578 error = port_create(&r.r_val1); 579 if (error) 580 return ((int64_t)set_errno(error)); 581 return (r.r_vals); 582 } 583 } 584 585 /* opcodes using port as first argument (a0) */ 586 587 if ((fp = getf((int)a0)) == NULL) 588 return ((uintptr_t)set_errno(EBADF)); 589 590 if (fp->f_vnode->v_type != VPORT) { 591 releasef((int)a0); 592 return ((uintptr_t)set_errno(EBADFD)); 593 } 594 595 pp = VTOEP(fp->f_vnode); 596 597 switch (opcode & PORT_CODE_MASK) { 598 case PORT_GET: 599 { 600 /* see PORT_GETN description */ 601 struct timespec timeout; 602 603 port_timer.pgt_flags = PORTGET_ONE; 604 port_timer.pgt_loop = 0; 605 port_timer.pgt_rqtp = NULL; 606 if (a4 != 0) { 607 port_timer.pgt_timeout = &timeout; 608 timeout.tv_sec = (time_t)a2; 609 timeout.tv_nsec = (long)a3; 610 } else { 611 port_timer.pgt_timeout = NULL; 612 } 613 do { 614 nget = 1; 615 error = port_getn(pp, (port_event_t *)a1, 1, 616 (uint_t *)&nget, &port_timer); 617 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 618 break; 619 } 620 case PORT_GETN: 621 { 622 /* 623 * port_getn() can only retrieve own or shareable events from 624 * other processes. The port_getn() function remains in the 625 * kernel until own or shareable events are available or the 626 * timeout elapses. 627 */ 628 port_timer.pgt_flags = 0; 629 port_timer.pgt_loop = 0; 630 port_timer.pgt_rqtp = NULL; 631 port_timer.pgt_timeout = (struct timespec *)a4; 632 do { 633 nget = a3; 634 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2, 635 (uint_t *)&nget, &port_timer); 636 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 637 r.r_val1 = nget; 638 r.r_val2 = error; 639 releasef((int)a0); 640 if (error && error != ETIME) 641 return ((int64_t)set_errno(error)); 642 return (r.r_vals); 643 } 644 case PORT_ASSOCIATE: 645 { 646 switch ((int)a1) { 647 case PORT_SOURCE_FD: 648 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, 649 (int)a3, (void *)a4); 650 break; 651 case PORT_SOURCE_FILE: 652 error = port_associate_fop(pp, (int)a1, (uintptr_t)a2, 653 (int)a3, (void *)a4); 654 break; 655 default: 656 error = EINVAL; 657 break; 658 } 659 break; 660 } 661 case PORT_SEND: 662 { 663 /* user-defined events */ 664 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2); 665 break; 666 } 667 case PORT_DISPATCH: 668 { 669 /* 670 * library events, blocking 671 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ 672 * are currently allowed. 673 */ 674 if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) { 675 error = EINVAL; 676 break; 677 } 678 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2, 679 (uintptr_t)a3, (void *)a4); 680 break; 681 } 682 case PORT_DISSOCIATE: 683 { 684 switch ((int)a1) { 685 case PORT_SOURCE_FD: 686 error = port_dissociate_fd(pp, (uintptr_t)a2); 687 break; 688 case PORT_SOURCE_FILE: 689 error = port_dissociate_fop(pp, (uintptr_t)a2); 690 break; 691 default: 692 error = EINVAL; 693 break; 694 } 695 break; 696 } 697 case PORT_ALERT: 698 { 699 if ((int)a2) /* a2 = events */ 700 error = port_alert(pp, (int)a1, (int)a2, (void *)a3); 701 else 702 port_remove_alert(&pp->port_queue); 703 break; 704 } 705 default: 706 error = EINVAL; 707 break; 708 } 709 710 releasef((int)a0); 711 if (error) 712 return ((int64_t)set_errno(error)); 713 return (r.r_vals); 714 } 715 716 /* 717 * System call to create a port. 718 * 719 * The port_create() function creates a vnode of type VPORT per port. 720 * The port control data is associated with the vnode as vnode private data. 721 * The port_create() function returns an event port file descriptor. 722 */ 723 static int 724 port_create(int *fdp) 725 { 726 port_t *pp; 727 vnode_t *vp; 728 struct file *fp; 729 proc_t *p = curproc; 730 731 /* initialize vnode and port private data */ 732 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP); 733 734 pp->port_vnode = vn_alloc(KM_SLEEP); 735 vp = EPTOV(pp); 736 vn_setops(vp, port_vnodeops); 737 vp->v_type = VPORT; 738 vp->v_vfsp = &port_vfs; 739 vp->v_data = (caddr_t)pp; 740 741 mutex_enter(&port_control.pc_mutex); 742 /* 743 * Retrieve the maximal number of event ports allowed per system from 744 * the resource control: project.port-max-ids. 745 */ 746 mutex_enter(&p->p_lock); 747 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p, 748 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) { 749 mutex_exit(&p->p_lock); 750 vn_free(vp); 751 kmem_free(pp, sizeof (port_t)); 752 mutex_exit(&port_control.pc_mutex); 753 return (EAGAIN); 754 } 755 756 /* 757 * Retrieve the maximal number of events allowed per port from 758 * the resource control: process.port-max-events. 759 */ 760 pp->port_max_events = rctl_enforced_value(rc_process_portev, 761 p->p_rctls, p); 762 mutex_exit(&p->p_lock); 763 764 /* allocate a new user file descriptor and a file structure */ 765 if (falloc(vp, 0, &fp, fdp)) { 766 /* 767 * If the file table is full, free allocated resources. 768 */ 769 vn_free(vp); 770 kmem_free(pp, sizeof (port_t)); 771 mutex_exit(&port_control.pc_mutex); 772 return (EMFILE); 773 } 774 775 mutex_exit(&fp->f_tlock); 776 777 pp->port_fd = *fdp; 778 port_control.pc_nents++; 779 p->p_portcnt++; 780 port_kstat.pks_ports.value.ui32++; 781 mutex_exit(&port_control.pc_mutex); 782 783 /* initializes port private data */ 784 port_init(pp); 785 /* set user file pointer */ 786 setf(*fdp, fp); 787 return (0); 788 } 789 790 /* 791 * port_init() initializes event port specific data 792 */ 793 static void 794 port_init(port_t *pp) 795 { 796 port_queue_t *portq; 797 port_ksource_t *pks; 798 799 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL); 800 portq = &pp->port_queue; 801 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL); 802 pp->port_flags |= PORT_INIT; 803 804 /* 805 * If it is not enough memory available to satisfy a user 806 * request using a single port_getn() call then port_getn() 807 * will reduce the size of the list to PORT_MAX_LIST. 808 */ 809 pp->port_max_list = port_max_list; 810 811 /* Set timestamp entries required for fstat(2) requests */ 812 gethrestime(&pp->port_ctime); 813 pp->port_uid = crgetuid(curproc->p_cred); 814 pp->port_gid = crgetgid(curproc->p_cred); 815 816 /* initialize port queue structs */ 817 list_create(&portq->portq_list, sizeof (port_kevent_t), 818 offsetof(port_kevent_t, portkev_node)); 819 list_create(&portq->portq_get_list, sizeof (port_kevent_t), 820 offsetof(port_kevent_t, portkev_node)); 821 portq->portq_flags = 0; 822 pp->port_pid = curproc->p_pid; 823 824 /* Allocate cache skeleton for PORT_SOURCE_FD events */ 825 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP); 826 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL); 827 828 /* 829 * Allocate cache skeleton for association of event sources. 830 */ 831 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL); 832 portq->portq_scache = kmem_zalloc( 833 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP); 834 835 /* 836 * pre-associate some kernel sources with this port. 837 * The pre-association is required to create port_source_t 838 * structures for object association. 839 * Some sources can not get associated with a port before the first 840 * object association is requested. Another reason to pre_associate 841 * a particular source with a port is because of performance. 842 */ 843 844 for (pks = port_ksource_tab; pks->pks_source != 0; pks++) 845 port_add_ksource_local(pp, pks); 846 } 847 848 /* 849 * The port_add_ksource_local() function is being used to associate 850 * event sources with every new port. 851 * The event sources need to be added to port_ksource_tab[]. 852 */ 853 static void 854 port_add_ksource_local(port_t *pp, port_ksource_t *pks) 855 { 856 port_source_t *pse; 857 port_source_t **ps; 858 859 mutex_enter(&pp->port_queue.portq_source_mutex); 860 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)]; 861 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { 862 if (pse->portsrc_source == pks->pks_source) 863 break; 864 } 865 866 if (pse == NULL) { 867 /* associate new source with the port */ 868 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP); 869 pse->portsrc_source = pks->pks_source; 870 pse->portsrc_close = pks->pks_close; 871 pse->portsrc_closearg = pks->pks_closearg; 872 pse->portsrc_cnt = 1; 873 874 pks->pks_portsrc = pse; 875 if (*ps != NULL) 876 pse->portsrc_next = (*ps)->portsrc_next; 877 *ps = pse; 878 } 879 mutex_exit(&pp->port_queue.portq_source_mutex); 880 } 881 882 /* 883 * The port_send() function sends an event of type "source" to a 884 * port. This function is non-blocking. An event can be sent to 885 * a port as long as the number of events per port does not achieve the 886 * maximal allowed number of events. The max. number of events per port is 887 * defined by the resource control process.max-port-events. 888 * This function is used by the port library function port_send() 889 * and port_dispatch(). The port_send(3c) function is part of the 890 * event ports API and submits events of type PORT_SOURCE_USER. The 891 * port_dispatch() function is project private and it is used by library 892 * functions to submit events of other types than PORT_SOURCE_USER 893 * (e.g. PORT_SOURCE_AIO). 894 */ 895 static int 896 port_send(port_t *pp, int source, int events, void *user) 897 { 898 port_kevent_t *pev; 899 int error; 900 901 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev); 902 if (error) 903 return (error); 904 905 pev->portkev_object = 0; 906 pev->portkev_events = events; 907 pev->portkev_user = user; 908 pev->portkev_callback = NULL; 909 pev->portkev_arg = NULL; 910 pev->portkev_flags = 0; 911 912 port_send_event(pev); 913 return (0); 914 } 915 916 /* 917 * The port_noshare() function returns 0 if the current event was generated 918 * by the same process. Otherwise is returns a value other than 0 and the 919 * event should not be delivered to the current processe. 920 * The port_noshare() function is normally used by the port_dispatch() 921 * function. The port_dispatch() function is project private and can only be 922 * used within the event port project. 923 * Currently the libaio uses the port_dispatch() function to deliver events 924 * of types PORT_SOURCE_AIO. 925 */ 926 /* ARGSUSED */ 927 static int 928 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp) 929 { 930 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid) 931 return (1); 932 return (0); 933 } 934 935 /* 936 * The port_dispatch_event() function is project private and it is used by 937 * libraries involved in the project to deliver events to the port. 938 * port_dispatch will sleep and wait for enough resources to satisfy the 939 * request, if necessary. 940 * The library can specify if the delivered event is shareable with other 941 * processes (see PORT_SYS_NOSHARE flag). 942 */ 943 static int 944 port_dispatch_event(port_t *pp, int opcode, int source, int events, 945 uintptr_t object, void *user) 946 { 947 port_kevent_t *pev; 948 int error; 949 950 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev); 951 if (error) 952 return (error); 953 954 pev->portkev_object = object; 955 pev->portkev_events = events; 956 pev->portkev_user = user; 957 pev->portkev_arg = NULL; 958 if (opcode & PORT_SYS_NOSHARE) { 959 pev->portkev_flags = PORT_KEV_NOSHARE; 960 pev->portkev_callback = port_noshare; 961 } else { 962 pev->portkev_flags = 0; 963 pev->portkev_callback = NULL; 964 } 965 966 port_send_event(pev); 967 return (0); 968 } 969 970 971 /* 972 * The port_sendn() function is the kernel implementation of the event 973 * port API function port_sendn(3c). 974 * This function is able to send an event to a list of event ports. 975 */ 976 static int 977 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user, 978 uint_t *nget) 979 { 980 port_kevent_t *pev; 981 int errorcnt = 0; 982 int error = 0; 983 int count; 984 int port; 985 int *plist; 986 int *elist = NULL; 987 file_t *fp; 988 port_t *pp; 989 990 if (nent == 0 || nent > port_max_list) 991 return (EINVAL); 992 993 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP); 994 if (copyin((void *)ports, plist, nent * sizeof (int))) { 995 kmem_free(plist, nent * sizeof (int)); 996 return (EFAULT); 997 } 998 999 /* 1000 * Scan the list for event port file descriptors and send the 1001 * attached user event data embedded in a event of type 1002 * PORT_SOURCE_USER to every event port in the list. 1003 * If a list entry is not a valid event port then the corresponding 1004 * error code will be stored in the errors[] list with the same 1005 * list offset as in the ports[] list. 1006 */ 1007 1008 for (count = 0; count < nent; count++) { 1009 port = plist[count]; 1010 if ((fp = getf(port)) == NULL) { 1011 elist = port_errorn(elist, nent, EBADF, count); 1012 errorcnt++; 1013 continue; 1014 } 1015 1016 pp = VTOEP(fp->f_vnode); 1017 if (fp->f_vnode->v_type != VPORT) { 1018 releasef(port); 1019 elist = port_errorn(elist, nent, EBADFD, count); 1020 errorcnt++; 1021 continue; 1022 } 1023 1024 error = port_alloc_event_local(pp, PORT_SOURCE_USER, 1025 PORT_ALLOC_DEFAULT, &pev); 1026 if (error) { 1027 releasef(port); 1028 elist = port_errorn(elist, nent, error, count); 1029 errorcnt++; 1030 continue; 1031 } 1032 1033 pev->portkev_object = 0; 1034 pev->portkev_events = events; 1035 pev->portkev_user = user; 1036 pev->portkev_callback = NULL; 1037 pev->portkev_arg = NULL; 1038 pev->portkev_flags = 0; 1039 1040 port_send_event(pev); 1041 releasef(port); 1042 } 1043 if (errorcnt) { 1044 error = EIO; 1045 if (copyout(elist, (void *)errors, nent * sizeof (int))) 1046 error = EFAULT; 1047 kmem_free(elist, nent * sizeof (int)); 1048 } 1049 *nget = nent - errorcnt; 1050 kmem_free(plist, nent * sizeof (int)); 1051 return (error); 1052 } 1053 1054 static int * 1055 port_errorn(int *elist, int nent, int error, int index) 1056 { 1057 if (elist == NULL) 1058 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP); 1059 elist[index] = error; 1060 return (elist); 1061 } 1062 1063 /* 1064 * port_alert() 1065 * The port_alert() funcion is a high priority event and it is always set 1066 * on top of the queue. It is also delivered as single event. 1067 * flags: 1068 * - SET :overwrite current alert data 1069 * - UPDATE:set alert data or return EBUSY if alert mode is already set 1070 * 1071 * - set the ALERT flag 1072 * - wakeup all sleeping threads 1073 */ 1074 static int 1075 port_alert(port_t *pp, int flags, int events, void *user) 1076 { 1077 port_queue_t *portq; 1078 portget_t *pgetp; 1079 port_alert_t *pa; 1080 1081 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID) 1082 return (EINVAL); 1083 1084 portq = &pp->port_queue; 1085 pa = &portq->portq_alert; 1086 mutex_enter(&portq->portq_mutex); 1087 1088 /* check alert conditions */ 1089 if (flags == PORT_ALERT_UPDATE) { 1090 if (portq->portq_flags & PORTQ_ALERT) { 1091 mutex_exit(&portq->portq_mutex); 1092 return (EBUSY); 1093 } 1094 } 1095 1096 /* 1097 * Store alert data in the port to be delivered to threads 1098 * which are using port_get(n) to retrieve events. 1099 */ 1100 1101 portq->portq_flags |= PORTQ_ALERT; 1102 pa->portal_events = events; /* alert info */ 1103 pa->portal_pid = curproc->p_pid; /* process owner */ 1104 pa->portal_object = 0; /* no object */ 1105 pa->portal_user = user; /* user alert data */ 1106 1107 /* alert and deliver alert data to waiting threads */ 1108 pgetp = portq->portq_thread; 1109 if (pgetp == NULL) { 1110 /* no threads waiting for events */ 1111 mutex_exit(&portq->portq_mutex); 1112 return (0); 1113 } 1114 1115 /* 1116 * Set waiting threads in alert mode (PORTGET_ALERT).. 1117 * Every thread waiting for events already allocated a portget_t 1118 * structure to sleep on. 1119 * The port alert arguments are stored in the portget_t structure. 1120 * The PORTGET_ALERT flag is set to indicate the thread to return 1121 * immediately with the alert event. 1122 */ 1123 do { 1124 if ((pgetp->portget_state & PORTGET_ALERT) == 0) { 1125 pa = &pgetp->portget_alert; 1126 pa->portal_events = events; 1127 pa->portal_object = 0; 1128 pa->portal_user = user; 1129 pgetp->portget_state |= PORTGET_ALERT; 1130 cv_signal(&pgetp->portget_cv); 1131 } 1132 } while ((pgetp = pgetp->portget_next) != portq->portq_thread); 1133 mutex_exit(&portq->portq_mutex); 1134 return (0); 1135 } 1136 1137 /* 1138 * Clear alert state of the port 1139 */ 1140 static void 1141 port_remove_alert(port_queue_t *portq) 1142 { 1143 mutex_enter(&portq->portq_mutex); 1144 portq->portq_flags &= ~PORTQ_ALERT; 1145 mutex_exit(&portq->portq_mutex); 1146 } 1147 1148 /* 1149 * The port_getn() function is used to retrieve events from a port. 1150 * 1151 * The port_getn() function returns immediately if there are enough events 1152 * available in the port to satisfy the request or if the port is in alert 1153 * mode (see port_alert(3c)). 1154 * The timeout argument of port_getn(3c) -which is embedded in the 1155 * port_gettimer_t structure- specifies if the system call should block or if it 1156 * should return immediately depending on the number of events available. 1157 * This function is internally used by port_getn(3c) as well as by 1158 * port_get(3c). 1159 */ 1160 static int 1161 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget, 1162 port_gettimer_t *pgt) 1163 { 1164 port_queue_t *portq; 1165 port_kevent_t *pev; 1166 port_kevent_t *lev; 1167 int error = 0; 1168 uint_t nmax; 1169 uint_t nevents; 1170 uint_t eventsz; 1171 port_event_t *kevp; 1172 list_t *glist; 1173 uint_t tnent; 1174 int rval; 1175 int blocking = -1; 1176 int timecheck; 1177 int flag; 1178 timespec_t rqtime; 1179 timespec_t *rqtp = NULL; 1180 portget_t *pgetp; 1181 void *results; 1182 model_t model = get_udatamodel(); 1183 1184 flag = pgt->pgt_flags; 1185 1186 if (*nget > max && max > 0) 1187 return (EINVAL); 1188 1189 portq = &pp->port_queue; 1190 mutex_enter(&portq->portq_mutex); 1191 if (max == 0) { 1192 /* 1193 * Return number of objects with events. 1194 * The port_block() call is required to synchronize this 1195 * thread with another possible thread, which could be 1196 * retrieving events from the port queue. 1197 */ 1198 port_block(portq); 1199 /* 1200 * Check if a second thread is currently retrieving events 1201 * and it is using the temporary event queue. 1202 */ 1203 if (portq->portq_tnent) { 1204 /* put remaining events back to the port queue */ 1205 port_push_eventq(portq); 1206 } 1207 *nget = portq->portq_nent; 1208 port_unblock(portq); 1209 mutex_exit(&portq->portq_mutex); 1210 return (0); 1211 } 1212 1213 if (uevp == NULL) { 1214 mutex_exit(&portq->portq_mutex); 1215 return (EFAULT); 1216 } 1217 if (*nget == 0) { /* no events required */ 1218 mutex_exit(&portq->portq_mutex); 1219 return (0); 1220 } 1221 1222 /* port is being closed ... */ 1223 if (portq->portq_flags & PORTQ_CLOSE) { 1224 mutex_exit(&portq->portq_mutex); 1225 return (EBADFD); 1226 } 1227 1228 /* return immediately if port in alert mode */ 1229 if (portq->portq_flags & PORTQ_ALERT) { 1230 error = port_get_alert(&portq->portq_alert, uevp); 1231 if (error == 0) 1232 *nget = 1; 1233 mutex_exit(&portq->portq_mutex); 1234 return (error); 1235 } 1236 1237 portq->portq_thrcnt++; 1238 1239 /* 1240 * Now check if the completed events satisfy the 1241 * "wait" requirements of the current thread: 1242 */ 1243 1244 if (pgt->pgt_loop) { 1245 /* 1246 * loop entry of same thread 1247 * pgt_loop is set when the current thread returns 1248 * prematurely from this function. That could happen 1249 * when a port is being shared between processes and 1250 * this thread could not find events to return. 1251 * It is not allowed to a thread to retrieve non-shareable 1252 * events generated in other processes. 1253 * PORTQ_WAIT_EVENTS is set when a thread already 1254 * checked the current event queue and no new events 1255 * are added to the queue. 1256 */ 1257 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) && 1258 (portq->portq_nent >= *nget)) { 1259 /* some new events arrived ...check them */ 1260 goto portnowait; 1261 } 1262 rqtp = pgt->pgt_rqtp; 1263 timecheck = pgt->pgt_timecheck; 1264 pgt->pgt_flags |= PORTGET_WAIT_EVENTS; 1265 } else { 1266 /* check if enough events are available ... */ 1267 if (portq->portq_nent >= *nget) 1268 goto portnowait; 1269 /* 1270 * There are not enough events available to satisfy 1271 * the request, check timeout value and wait for 1272 * incoming events. 1273 */ 1274 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp, 1275 &blocking, flag); 1276 if (error) { 1277 port_check_return_cond(portq); 1278 mutex_exit(&portq->portq_mutex); 1279 return (error); 1280 } 1281 1282 if (blocking == 0) /* don't block, check fired events */ 1283 goto portnowait; 1284 1285 if (rqtp != NULL) { 1286 timespec_t now; 1287 timecheck = timechanged; 1288 gethrestime(&now); 1289 timespecadd(rqtp, &now); 1290 } 1291 } 1292 1293 /* enqueue thread in the list of waiting threads */ 1294 pgetp = port_queue_thread(portq, *nget); 1295 1296 1297 /* Wait here until return conditions met */ 1298 for (;;) { 1299 if (pgetp->portget_state & PORTGET_ALERT) { 1300 /* reap alert event and return */ 1301 error = port_get_alert(&pgetp->portget_alert, uevp); 1302 if (error) 1303 *nget = 0; 1304 else 1305 *nget = 1; 1306 port_dequeue_thread(&pp->port_queue, pgetp); 1307 portq->portq_thrcnt--; 1308 mutex_exit(&portq->portq_mutex); 1309 return (error); 1310 } 1311 1312 /* 1313 * Check if some other thread is already retrieving 1314 * events (portq_getn > 0). 1315 */ 1316 1317 if ((portq->portq_getn == 0) && 1318 ((portq)->portq_nent >= *nget) && 1319 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) || 1320 !((portq)->portq_flags & PORTQ_WAIT_EVENTS))) 1321 break; 1322 1323 if (portq->portq_flags & PORTQ_CLOSE) { 1324 error = EBADFD; 1325 break; 1326 } 1327 1328 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex, 1329 rqtp, timecheck); 1330 1331 if (rval <= 0) { 1332 error = (rval == 0) ? EINTR : ETIME; 1333 break; 1334 } 1335 } 1336 1337 /* take thread out of the wait queue */ 1338 port_dequeue_thread(portq, pgetp); 1339 1340 if (error != 0 && (error == EINTR || error == EBADFD || 1341 (error == ETIME && flag))) { 1342 /* return without events */ 1343 port_check_return_cond(portq); 1344 mutex_exit(&portq->portq_mutex); 1345 return (error); 1346 } 1347 1348 portnowait: 1349 /* 1350 * Move port event queue to a temporary event queue . 1351 * New incoming events will be continue be posted to the event queue 1352 * and they will not be considered by the current thread. 1353 * The idea is to avoid lock contentions or an often locking/unlocking 1354 * of the port queue mutex. The contention and performance degradation 1355 * could happen because: 1356 * a) incoming events use the port queue mutex to enqueue new events and 1357 * b) before the event can be delivered to the application it is 1358 * necessary to notify the event sources about the event delivery. 1359 * Sometimes the event sources can require a long time to return and 1360 * the queue mutex would block incoming events. 1361 * During this time incoming events (port_send_event()) do not need 1362 * to awake threads waiting for events. Before the current thread 1363 * returns it will check the conditions to awake other waiting threads. 1364 */ 1365 portq->portq_getn++; /* number of threads retrieving events */ 1366 port_block(portq); /* block other threads here */ 1367 nmax = max < portq->portq_nent ? max : portq->portq_nent; 1368 1369 if (portq->portq_tnent) { 1370 /* 1371 * Move remaining events from previous thread back to the 1372 * port event queue. 1373 */ 1374 port_push_eventq(portq); 1375 } 1376 /* move port event queue to a temporary queue */ 1377 list_move_tail(&portq->portq_get_list, &portq->portq_list); 1378 glist = &portq->portq_get_list; /* use temporary event queue */ 1379 tnent = portq->portq_nent; /* get current number of events */ 1380 portq->portq_nent = 0; /* no events in the port event queue */ 1381 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */ 1382 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */ 1383 1384 if (model == DATAMODEL_NATIVE) { 1385 eventsz = sizeof (port_event_t); 1386 1387 if (nmax == 0) { 1388 kevp = NULL; 1389 } else { 1390 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1391 if (kevp == NULL) { 1392 if (nmax > pp->port_max_list) 1393 nmax = pp->port_max_list; 1394 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP); 1395 } 1396 } 1397 1398 results = kevp; 1399 lev = NULL; /* start with first event in the queue */ 1400 for (nevents = 0; nevents < nmax; ) { 1401 pev = port_get_kevent(glist, lev); 1402 if (pev == NULL) /* no more events available */ 1403 break; 1404 if (pev->portkev_flags & PORT_KEV_FREE) { 1405 /* Just discard event */ 1406 list_remove(glist, pev); 1407 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1408 if (PORT_FREE_EVENT(pev)) 1409 port_free_event_local(pev, 0); 1410 tnent--; 1411 continue; 1412 } 1413 1414 /* move event data to copyout list */ 1415 if (port_copy_event(&kevp[nevents], pev, glist)) { 1416 /* 1417 * Event can not be delivered to the 1418 * current process. 1419 */ 1420 if (lev != NULL) 1421 list_insert_after(glist, lev, pev); 1422 else 1423 list_insert_head(glist, pev); 1424 lev = pev; /* last checked event */ 1425 } else { 1426 nevents++; /* # of events ready */ 1427 } 1428 } 1429 #ifdef _SYSCALL32_IMPL 1430 } else { 1431 port_event32_t *kevp32; 1432 1433 eventsz = sizeof (port_event32_t); 1434 1435 if (nmax == 0) { 1436 kevp32 = NULL; 1437 } else { 1438 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1439 if (kevp32 == NULL) { 1440 if (nmax > pp->port_max_list) 1441 nmax = pp->port_max_list; 1442 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP); 1443 } 1444 } 1445 1446 results = kevp32; 1447 lev = NULL; /* start with first event in the queue */ 1448 for (nevents = 0; nevents < nmax; ) { 1449 pev = port_get_kevent(glist, lev); 1450 if (pev == NULL) /* no more events available */ 1451 break; 1452 if (pev->portkev_flags & PORT_KEV_FREE) { 1453 /* Just discard event */ 1454 list_remove(glist, pev); 1455 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1456 if (PORT_FREE_EVENT(pev)) 1457 port_free_event_local(pev, 0); 1458 tnent--; 1459 continue; 1460 } 1461 1462 /* move event data to copyout list */ 1463 if (port_copy_event32(&kevp32[nevents], pev, glist)) { 1464 /* 1465 * Event can not be delivered to the 1466 * current process. 1467 */ 1468 if (lev != NULL) 1469 list_insert_after(glist, lev, pev); 1470 else 1471 list_insert_head(glist, pev); 1472 lev = pev; /* last checked event */ 1473 } else { 1474 nevents++; /* # of events ready */ 1475 } 1476 } 1477 #endif /* _SYSCALL32_IMPL */ 1478 } 1479 1480 /* 1481 * Remember number of remaining events in the temporary event queue. 1482 */ 1483 portq->portq_tnent = tnent - nevents; 1484 1485 /* 1486 * Work to do before return : 1487 * - push list of remaining events back to the top of the standard 1488 * port queue. 1489 * - if this is the last thread calling port_get(n) then wakeup the 1490 * thread waiting on close(2). 1491 * - check for a deferred cv_signal from port_send_event() and wakeup 1492 * the sleeping thread. 1493 */ 1494 1495 mutex_enter(&portq->portq_mutex); 1496 port_unblock(portq); 1497 if (portq->portq_tnent) { 1498 /* 1499 * move remaining events in the temporary event queue back 1500 * to the port event queue 1501 */ 1502 port_push_eventq(portq); 1503 } 1504 portq->portq_getn--; /* update # of threads retrieving events */ 1505 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */ 1506 /* Last thread => check close(2) conditions ... */ 1507 if (portq->portq_flags & PORTQ_CLOSE) { 1508 cv_signal(&portq->portq_closecv); 1509 mutex_exit(&portq->portq_mutex); 1510 kmem_free(results, eventsz * nmax); 1511 /* do not copyout events */ 1512 *nget = 0; 1513 return (EBADFD); 1514 } 1515 } else if (portq->portq_getn == 0) { 1516 /* 1517 * no other threads retrieving events ... 1518 * check wakeup conditions of sleeping threads 1519 */ 1520 if ((portq->portq_thread != NULL) && 1521 (portq->portq_nent >= portq->portq_nget)) 1522 cv_signal(&portq->portq_thread->portget_cv); 1523 } 1524 1525 /* 1526 * Check PORTQ_POLLIN here because the current thread set temporarily 1527 * the number of events in the queue to zero. 1528 */ 1529 if (portq->portq_flags & PORTQ_POLLIN) { 1530 portq->portq_flags &= ~PORTQ_POLLIN; 1531 mutex_exit(&portq->portq_mutex); 1532 pollwakeup(&pp->port_pollhd, POLLIN); 1533 } else { 1534 mutex_exit(&portq->portq_mutex); 1535 } 1536 1537 /* now copyout list of user event structures to user space */ 1538 if (nevents) { 1539 if (copyout(results, uevp, nevents * eventsz)) 1540 error = EFAULT; 1541 } 1542 kmem_free(results, eventsz * nmax); 1543 1544 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) { 1545 /* no events retrieved: check loop conditions */ 1546 if (blocking == -1) { 1547 /* no timeout checked */ 1548 error = port_get_timeout(pgt->pgt_timeout, 1549 &pgt->pgt_rqtime, &rqtp, &blocking, flag); 1550 if (error) { 1551 *nget = nevents; 1552 return (error); 1553 } 1554 if (rqtp != NULL) { 1555 timespec_t now; 1556 pgt->pgt_timecheck = timechanged; 1557 gethrestime(&now); 1558 timespecadd(&pgt->pgt_rqtime, &now); 1559 } 1560 pgt->pgt_rqtp = rqtp; 1561 } else { 1562 /* timeout already checked -> remember values */ 1563 pgt->pgt_rqtp = rqtp; 1564 if (rqtp != NULL) { 1565 pgt->pgt_timecheck = timecheck; 1566 pgt->pgt_rqtime = *rqtp; 1567 } 1568 } 1569 if (blocking) 1570 /* timeout remaining */ 1571 pgt->pgt_loop = 1; 1572 } 1573 1574 /* set number of user event structures completed */ 1575 *nget = nevents; 1576 return (error); 1577 } 1578 1579 /* 1580 * 1. copy kernel event structure to user event structure. 1581 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1582 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1583 * 4. Other types of event structures can be delivered back to the port cache 1584 * (port_free_event_local()). 1585 * 5. The event source callback function is the last opportunity for the 1586 * event source to update events, to free local resources associated with 1587 * the event or to deny the delivery of the event. 1588 */ 1589 static int 1590 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list) 1591 { 1592 int free_event = 0; 1593 int flags; 1594 int error; 1595 1596 puevp->portev_source = pkevp->portkev_source; 1597 puevp->portev_object = pkevp->portkev_object; 1598 puevp->portev_user = pkevp->portkev_user; 1599 puevp->portev_events = pkevp->portkev_events; 1600 1601 /* remove event from the queue */ 1602 list_remove(list, pkevp); 1603 1604 /* 1605 * Events of type PORT_KEV_WIRED remain allocated by the 1606 * event source. 1607 */ 1608 flags = pkevp->portkev_flags; 1609 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1610 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1611 else 1612 free_event = 1; 1613 1614 if (pkevp->portkev_callback) { 1615 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1616 &puevp->portev_events, pkevp->portkev_pid, 1617 PORT_CALLBACK_DEFAULT, pkevp); 1618 1619 if (error) { 1620 /* 1621 * Event can not be delivered. 1622 * Caller must reinsert the event into the queue. 1623 */ 1624 pkevp->portkev_flags = flags; 1625 return (error); 1626 } 1627 } 1628 if (free_event) 1629 port_free_event_local(pkevp, 0); 1630 return (0); 1631 } 1632 1633 #ifdef _SYSCALL32_IMPL 1634 /* 1635 * 1. copy kernel event structure to user event structure. 1636 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1637 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1638 * 4. Other types of event structures can be delivered back to the port cache 1639 * (port_free_event_local()). 1640 * 5. The event source callback function is the last opportunity for the 1641 * event source to update events, to free local resources associated with 1642 * the event or to deny the delivery of the event. 1643 */ 1644 static int 1645 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list) 1646 { 1647 int free_event = 0; 1648 int error; 1649 int flags; 1650 1651 puevp->portev_source = pkevp->portkev_source; 1652 puevp->portev_object = (daddr32_t)pkevp->portkev_object; 1653 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user; 1654 puevp->portev_events = pkevp->portkev_events; 1655 1656 /* remove event from the queue */ 1657 list_remove(list, pkevp); 1658 1659 /* 1660 * Events if type PORT_KEV_WIRED remain allocated by the 1661 * sub-system (source). 1662 */ 1663 1664 flags = pkevp->portkev_flags; 1665 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1666 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1667 else 1668 free_event = 1; 1669 1670 if (pkevp->portkev_callback != NULL) { 1671 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1672 &puevp->portev_events, pkevp->portkev_pid, 1673 PORT_CALLBACK_DEFAULT, pkevp); 1674 if (error) { 1675 /* 1676 * Event can not be delivered. 1677 * Caller must reinsert the event into the queue. 1678 */ 1679 pkevp->portkev_flags = flags; 1680 return (error); 1681 } 1682 } 1683 if (free_event) 1684 port_free_event_local(pkevp, 0); 1685 return (0); 1686 } 1687 #endif /* _SYSCALL32_IMPL */ 1688 1689 /* 1690 * copyout alert event. 1691 */ 1692 static int 1693 port_get_alert(port_alert_t *pa, port_event_t *uevp) 1694 { 1695 model_t model = get_udatamodel(); 1696 1697 /* copyout alert event structures to user space */ 1698 if (model == DATAMODEL_NATIVE) { 1699 port_event_t uev; 1700 uev.portev_source = PORT_SOURCE_ALERT; 1701 uev.portev_object = pa->portal_object; 1702 uev.portev_events = pa->portal_events; 1703 uev.portev_user = pa->portal_user; 1704 if (copyout(&uev, uevp, sizeof (port_event_t))) 1705 return (EFAULT); 1706 #ifdef _SYSCALL32_IMPL 1707 } else { 1708 port_event32_t uev32; 1709 uev32.portev_source = PORT_SOURCE_ALERT; 1710 uev32.portev_object = (daddr32_t)pa->portal_object; 1711 uev32.portev_events = pa->portal_events; 1712 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user; 1713 if (copyout(&uev32, uevp, sizeof (port_event32_t))) 1714 return (EFAULT); 1715 #endif /* _SYSCALL32_IMPL */ 1716 } 1717 return (0); 1718 } 1719 1720 /* 1721 * Check return conditions : 1722 * - pending port close(2) 1723 * - threads waiting for events 1724 */ 1725 static void 1726 port_check_return_cond(port_queue_t *portq) 1727 { 1728 ASSERT(MUTEX_HELD(&portq->portq_mutex)); 1729 portq->portq_thrcnt--; 1730 if (portq->portq_flags & PORTQ_CLOSE) { 1731 if (portq->portq_thrcnt == 0) 1732 cv_signal(&portq->portq_closecv); 1733 else 1734 cv_signal(&portq->portq_thread->portget_cv); 1735 } 1736 } 1737 1738 /* 1739 * The port_get_kevent() function returns 1740 * - the event located at the head of the queue if 'last' pointer is NULL 1741 * - the next event after the event pointed by 'last' 1742 * The caller of this function is responsible for the integrity of the queue 1743 * in use: 1744 * - port_getn() is using a temporary queue protected with port_block(). 1745 * - port_close_events() is working on the global event queue and protects 1746 * the queue with portq->portq_mutex. 1747 */ 1748 port_kevent_t * 1749 port_get_kevent(list_t *list, port_kevent_t *last) 1750 { 1751 if (last == NULL) 1752 return (list_head(list)); 1753 else 1754 return (list_next(list, last)); 1755 } 1756 1757 /* 1758 * The port_get_timeout() function gets the timeout data from user space 1759 * and converts that info into a corresponding internal representation. 1760 * The kerneldata flag means that the timeout data is already loaded. 1761 */ 1762 static int 1763 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp, 1764 int *blocking, int kerneldata) 1765 { 1766 model_t model = get_udatamodel(); 1767 1768 *rqtp = NULL; 1769 if (timeout == NULL) { 1770 *blocking = 1; 1771 return (0); 1772 } 1773 1774 if (kerneldata) { 1775 *rqtime = *timeout; 1776 } else { 1777 if (model == DATAMODEL_NATIVE) { 1778 if (copyin(timeout, rqtime, sizeof (*rqtime))) 1779 return (EFAULT); 1780 #ifdef _SYSCALL32_IMPL 1781 } else { 1782 timespec32_t wait_time_32; 1783 if (copyin(timeout, &wait_time_32, 1784 sizeof (wait_time_32))) 1785 return (EFAULT); 1786 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 1787 #endif /* _SYSCALL32_IMPL */ 1788 } 1789 } 1790 1791 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 1792 *blocking = 0; 1793 return (0); 1794 } 1795 1796 if (rqtime->tv_sec < 0 || 1797 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 1798 return (EINVAL); 1799 1800 *rqtp = rqtime; 1801 *blocking = 1; 1802 return (0); 1803 } 1804 1805 /* 1806 * port_queue_thread() 1807 * Threads requiring more events than available will be put in a wait queue. 1808 * There is a "thread wait queue" per port. 1809 * Threads requiring less events get a higher priority than others and they 1810 * will be awoken first. 1811 */ 1812 static portget_t * 1813 port_queue_thread(port_queue_t *portq, uint_t nget) 1814 { 1815 portget_t *pgetp; 1816 portget_t *ttp; 1817 portget_t *htp; 1818 1819 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP); 1820 pgetp->portget_nget = nget; 1821 pgetp->portget_pid = curproc->p_pid; 1822 if (portq->portq_thread == NULL) { 1823 /* first waiting thread */ 1824 portq->portq_thread = pgetp; 1825 portq->portq_nget = nget; 1826 pgetp->portget_prev = pgetp; 1827 pgetp->portget_next = pgetp; 1828 return (pgetp); 1829 } 1830 1831 /* 1832 * thread waiting for less events will be set on top of the queue. 1833 */ 1834 ttp = portq->portq_thread; 1835 htp = ttp; 1836 for (;;) { 1837 if (nget <= ttp->portget_nget) 1838 break; 1839 if (htp == ttp->portget_next) 1840 break; /* last event */ 1841 ttp = ttp->portget_next; 1842 } 1843 1844 /* add thread to the queue */ 1845 pgetp->portget_next = ttp; 1846 pgetp->portget_prev = ttp->portget_prev; 1847 ttp->portget_prev->portget_next = pgetp; 1848 ttp->portget_prev = pgetp; 1849 if (portq->portq_thread == ttp) 1850 portq->portq_thread = pgetp; 1851 portq->portq_nget = portq->portq_thread->portget_nget; 1852 return (pgetp); 1853 } 1854 1855 /* 1856 * Take thread out of the queue. 1857 */ 1858 static void 1859 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp) 1860 { 1861 if (pgetp->portget_next == pgetp) { 1862 /* last (single) waiting thread */ 1863 portq->portq_thread = NULL; 1864 portq->portq_nget = 0; 1865 } else { 1866 pgetp->portget_prev->portget_next = pgetp->portget_next; 1867 pgetp->portget_next->portget_prev = pgetp->portget_prev; 1868 if (portq->portq_thread == pgetp) 1869 portq->portq_thread = pgetp->portget_next; 1870 portq->portq_nget = portq->portq_thread->portget_nget; 1871 } 1872 kmem_free(pgetp, sizeof (portget_t)); 1873 } 1874 1875 /* 1876 * Set up event port kstats. 1877 */ 1878 static void 1879 port_kstat_init() 1880 { 1881 kstat_t *ksp; 1882 uint_t ndata; 1883 1884 ndata = sizeof (port_kstat) / sizeof (kstat_named_t); 1885 ksp = kstat_create("portfs", 0, "Event Ports", "misc", 1886 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL); 1887 if (ksp) { 1888 ksp->ks_data = &port_kstat; 1889 kstat_install(ksp); 1890 } 1891 } 1892