1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/cred.h> 32 #include <sys/modctl.h> 33 #include <sys/vfs.h> 34 #include <sys/vfs_opreg.h> 35 #include <sys/sysmacros.h> 36 #include <sys/cmn_err.h> 37 #include <sys/stat.h> 38 #include <sys/errno.h> 39 #include <sys/kmem.h> 40 #include <sys/file.h> 41 #include <sys/kstat.h> 42 #include <sys/port_impl.h> 43 #include <sys/task.h> 44 #include <sys/project.h> 45 46 /* 47 * Event Ports can be shared across threads or across processes. 48 * Every thread/process can use an own event port or a group of them 49 * can use a single port. A major request was also to get the ability 50 * to submit user-defined events to a port. The idea of the 51 * user-defined events is to use the event ports for communication between 52 * threads/processes (like message queues). User defined-events are queued 53 * in a port with the same priority as other event types. 54 * 55 * Events are delivered only once. The thread/process which is waiting 56 * for events with the "highest priority" (priority here is related to the 57 * internal strategy to wakeup waiting threads) will retrieve the event, 58 * all other threads/processes will not be notified. There is also 59 * the requirement to have events which should be submitted immediately 60 * to all "waiting" threads. That is the main task of the alert event. 61 * The alert event is submitted by the application to a port. The port 62 * changes from a standard mode to the alert mode. Now all waiting threads 63 * will be awaken immediately and they will return with the alert event. 64 * Threads trying to retrieve events from a port in alert mode will 65 * return immediately with the alert event. 66 * 67 * 68 * An event port is like a kernel queue, which accept events submitted from 69 * user level as well as events submitted from kernel sub-systems. Sub-systems 70 * able to submit events to a port are the so-called "event sources". 71 * Current event sources: 72 * PORT_SOURCE_AIO : events submitted per transaction completion from 73 * POSIX-I/O framework. 74 * PORT_SOURCE_TIMER : events submitted when a timer fires 75 * (see timer_create(3RT)). 76 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)). 77 * PORT_SOURCE_ALERT : events submitted from user. This is not really a 78 * single event, this is actually a port mode 79 * (see port_alert(3c)). 80 * PORT_SOURCE_USER : events submitted by applications with 81 * port_send(3c) or port_sendn(3c). 82 * PORT_SOURCE_FILE : events submitted per file being watched for file 83 * change events (see port_create(3c). 84 * 85 * There is a user API implemented in the libc library as well as a 86 * kernel API implemented in port_subr.c in genunix. 87 * The available user API functions are: 88 * port_create() : create a port as a file descriptor of portfs file system 89 * The standard close(2) function closes a port. 90 * port_associate() : associate a file descriptor with a port to be able to 91 * retrieve events from that file descriptor. 92 * port_dissociate(): remove the association of a file descriptor with a port. 93 * port_alert() : set/unset a port in alert mode 94 * port_send() : send an event of type PORT_SOURCE_USER to a port 95 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports 96 * port_get() : retrieve a single event from a port 97 * port_getn() : retrieve a list of events from a port 98 * 99 * The available kernel API functions are: 100 * port_allocate_event(): allocate an event slot/structure of/from a port 101 * port_init_event() : set event data in the event structure 102 * port_send_event() : send event to a port 103 * port_free_event() : deliver allocated slot/structure back to a port 104 * port_associate_ksource(): associate a kernel event source with a port 105 * port_dissociate_ksource(): dissociate a kernel event source from a port 106 * 107 * The libc implementation consists of small functions which pass the 108 * arguments to the kernel using the "portfs" system call. It means, all the 109 * synchronisation work is being done in the kernel. The "portfs" system 110 * call loads the portfs file system into the kernel. 111 * 112 * PORT CREATION 113 * The first function to be used is port_create() which internally creates 114 * a vnode and a portfs node. The portfs node is represented by the port_t 115 * structure, which again includes all the data necessary to control a port. 116 * port_create() returns a file descriptor, which needs to be used in almost 117 * all other event port functions. 118 * The maximum number of ports per system is controlled by the resource 119 * control: project:port-max-ids. 120 * 121 * EVENT GENERATION 122 * The second step is the triggering of events, which could be sent to a port. 123 * Every event source implements an own method to generate events for a port: 124 * PORT_SOURCE_AIO: 125 * The sigevent structure of the standard POSIX-IO functions 126 * was extended by an additional notification type. 127 * Standard notification types: 128 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD 129 * Event ports introduced now SIGEV_PORT. 130 * The notification type SIGEV_PORT specifies that a structure 131 * of type port_notify_t has to be attached to the sigev_value. 132 * The port_notify_t structure contains the event port file 133 * descriptor and a user-defined pointer. 134 * Internally the AIO implementation will use the kernel API 135 * functions to allocate an event port slot per transaction (aiocb) 136 * and sent the event to the port as soon as the transaction completes. 137 * All the events submitted per transaction are of type 138 * PORT_SOURCE_AIO. 139 * PORT_SOURCE_TIMER: 140 * The timer_create() function uses the same method as the 141 * PORT_SOURCE_AIO event source. It also uses the sigevent structure 142 * to deliver the port information. 143 * Internally the timer code will allocate a single event slot/struct 144 * per timer and it will send the timer event as soon as the timer 145 * fires. If the timer-fired event is not delivered to the application 146 * before the next period elapsed, then an overrun counter will be 147 * incremented. The timer event source uses a callback function to 148 * detect the delivery of the event to the application. At that time 149 * the timer callback function will update the event overrun counter. 150 * PORT_SOURCE_FD: 151 * This event source uses the port_associate() function to allocate 152 * an event slot/struct from a port. The application defines in the 153 * events argument of port_associate() the type of events which it is 154 * interested on. 155 * The internal pollwakeup() function is used by all the file 156 * systems --which are supporting the VOP_POLL() interface- to notify 157 * the upper layer (poll(2), devpoll(7d) and now event ports) about 158 * the event triggered (see valid events in poll(2)). 159 * The pollwakeup() function forwards the event to the layer registered 160 * to receive the current event. 161 * The port_dissociate() function can be used to free the allocated 162 * event slot from the port. Anyway, file descriptors deliver events 163 * only one time and remain deactivated until the application 164 * reactivates the association of a file descriptor with port_associate(). 165 * If an associated file descriptor is closed then the file descriptor 166 * will be dissociated automatically from the port. 167 * 168 * PORT_SOURCE_ALERT: 169 * This event type is generated when the port was previously set in 170 * alert mode using the port_alert() function. 171 * A single alert event is delivered to every thread which tries to 172 * retrieve events from a port. 173 * PORT_SOURCE_USER: 174 * This type of event is generated from user level using the port_send() 175 * function to send a user event to a port or the port_sendn() function 176 * to send an event to a list of ports. 177 * PORT_SOURCE_FILE: 178 * This event source uses the port_associate() interface to register 179 * a file to be monitored for changes. The file name that needs to be 180 * monitored is specified in the file_obj_t structure, a pointer to which 181 * is passed as an argument. The event types to be monitored are specified 182 * in the events argument. 183 * A file events monitor is represented internal per port per object 184 * address(the file_obj_t pointer). Which means there can be multiple 185 * watches registered on the same file using different file_obj_t 186 * structure pointer. With the help of the FEM(File Event Monitoring) 187 * hooks, the file's vnode ops are intercepted and relevant events 188 * delivered. The port_dissociate() function is used to de-register a 189 * file events monitor on a file. When the specified file is 190 * removed/renamed, the file events watch/monitor is automatically 191 * removed. 192 * 193 * EVENT DELIVERY / RETRIEVING EVENTS 194 * Events remain in the port queue until: 195 * - the application uses port_get() or port_getn() to retrieve events, 196 * - the event source cancel the event, 197 * - the event port is closed or 198 * - the process exits. 199 * The maximal number of events in a port queue is the maximal number 200 * of event slots/structures which can be allocated by event sources. 201 * The allocation of event slots/structures is controlled by the resource 202 * control: process.port-max-events. 203 * The port_get() function retrieves a single event and the port_getn() 204 * function retrieves a list of events. 205 * Events are classified as shareable and non-shareable events across processes. 206 * Non-shareable events are invisible for the port_get(n)() functions of 207 * processes other than the owner of the event. 208 * Shareable event types are: 209 * PORT_SOURCE_USER events 210 * This type of event is unconditionally shareable and without 211 * limitations. If the parent process sends a user event and closes 212 * the port afterwards, the event remains in the port and the child 213 * process will still be able to retrieve the user event. 214 * PORT_SOURCE_ALERT events 215 * This type of event is shareable between processes. 216 * Limitation: The alert mode of the port is removed if the owner 217 * (process which set the port in alert mode) of the 218 * alert event closes the port. 219 * PORT_SOURCE_FD events 220 * This type of event is conditional shareable between processes. 221 * After fork(2) all forked file descriptors are shareable between 222 * the processes. The child process is allowed to retrieve events 223 * from the associated file descriptors and it can also re-associate 224 * the fd with the port. 225 * Limitations: The child process is not allowed to dissociate 226 * the file descriptor from the port. Only the 227 * owner (process) of the association is allowed to 228 * dissociate the file descriptor from the port. 229 * If the owner of the association closes the port 230 * the association will be removed. 231 * PORT_SOURCE_AIO events 232 * This type of event is not shareable between processes. 233 * PORT_SOURCE_TIMER events 234 * This type of event is not shareable between processes. 235 * PORT_SOURCE_FILE events 236 * This type of event is not shareable between processes. 237 * 238 * FORK BEHAVIOUR 239 * On fork(2) the child process inherits all opened file descriptors from 240 * the parent process. This is also valid for port file descriptors. 241 * Associated file descriptors with a port maintain the association across the 242 * fork(2). It means, the child process gets full access to the port and 243 * it can retrieve events from all common associated file descriptors. 244 * Events of file descriptors created and associated with a port after the 245 * fork(2) are non-shareable and can only be retrieved by the same process. 246 * 247 * If the parent or the child process closes an exported port (using fork(2) 248 * or I_SENDFD) all the file descriptors associated with the port by the 249 * process will be dissociated from the port. Events of dissociated file 250 * descriptors as well as all non-shareable events will be discarded. 251 * The other process can continue working with the port as usual. 252 * 253 * CLOSING A PORT 254 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details. 255 * 256 * PORT EVENT STRUCTURES 257 * The global control structure of the event ports framework is port_control_t. 258 * port_control_t keeps track of the number of created ports in the system. 259 * The cache of the port event structures is also located in port_control_t. 260 * 261 * On port_create() the vnode and the portfs node is also created. 262 * The portfs node is represented by the port_t structure. 263 * The port_t structure manages all port specific tasks: 264 * - management of resource control values 265 * - port VOP_POLL interface 266 * - creation time 267 * - uid and gid of the port 268 * 269 * The port_t structure contains the port_queue_t structure. 270 * The port_queue_t structure contains all the data necessary for the 271 * queue management: 272 * - locking 273 * - condition variables 274 * - event counters 275 * - submitted events (represented by port_kevent_t structures) 276 * - threads waiting for event delivery (check portget_t structure) 277 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure) 278 * - event source management (managed by the port_source_t structure) 279 * - alert mode management (check port_alert_t structure) 280 * 281 * EVENT MANAGEMENT 282 * The event port file system creates a kmem_cache for internal allocation of 283 * event port structures. 284 * 285 * 1. Event source association with a port: 286 * The first step to do for event sources is to get associated with a port 287 * using the port_associate_ksource() function or adding an entry to the 288 * port_ksource_tab[]. An event source can get dissociated from a port 289 * using the port_dissociate_ksource() function. An entry in the 290 * port_ksource_tab[] implies that the source will be associated 291 * automatically with every new created port. 292 * The event source can deliver a callback function, which is used by the 293 * port to notify the event source about close(2). The idea is that 294 * in such a case the event source should free all allocated resources 295 * and it must return to the port all allocated slots/structures. 296 * The port_close() function will wait until all allocated event 297 * structures/slots are returned to the port. 298 * The callback function is not necessary when the event source does not 299 * maintain local resources, a second condition is that the event source 300 * can guarantee that allocated event slots will be returned without 301 * delay to the port (it will not block and sleep somewhere). 302 * 303 * 2. Reservation of an event slot / event structure 304 * The event port reliability is based on the reservation of an event "slot" 305 * (allocation of an event structure) by the event source as part of the 306 * application call. If the maximal number of event slots is exhausted then 307 * the event source can return a corresponding error code to the application. 308 * 309 * The port_alloc_event() function has to be used by event sources to 310 * allocate an event slot (reserve an event structure). The port_alloc_event() 311 * doesn not block and it will return a 0 value on success or an error code 312 * if it fails. 313 * An argument of port_alloc_event() is a flag which determines the behavior 314 * of the event after it was delivered to the application: 315 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the 316 * application. 317 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event 318 * source. This kind of slots can not be used for 319 * event delivery and should only be used internally 320 * by the event source. 321 * PORT_KEV_CACHED : event slot remains under the control of an event 322 * port cache. It does not become free after delivery 323 * to the application. 324 * PORT_ALLOC_SCACHED : event slot remains under the control of the event 325 * source. The event source takes the control over 326 * the slot after the event is delivered to the 327 * application. 328 * 329 * 3. Delivery of events to the event port 330 * Earlier allocated event structure/slot has to be used to deliver 331 * event data to the port. Event source has to use the function 332 * port_send_event(). The single argument is a pointer to the previously 333 * reserved event structure/slot. 334 * The portkev_events field of the port_kevent_t structure can be updated/set 335 * in two ways: 336 * 1. using the port_set_event() function, or 337 * 2. updating the portkev_events field out of the callback function: 338 * The event source can deliver a callback function to the port as an 339 * argument of port_init_event(). 340 * One of the arguments of the callback function is a pointer to the 341 * events field, which will be delivered to the application. 342 * (see Delivery of events to the application). 343 * Event structures/slots can be delivered to the event port only one time, 344 * they remain blocked until the data is delivered to the application and the 345 * slot becomes free or it is delivered back to the event source 346 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above 347 * is at the same time the indicator for the event source that the event 348 * structure/slot is free for reuse. 349 * 350 * 4. Delivery of events to the application 351 * The events structures/slots delivered by event sources remain in the 352 * port queue until they are retrieved by the application or the port 353 * is closed (exit(2) also closes all opened file descriptors).. 354 * The application uses port_get() or port_getn() to retrieve events from 355 * a port. port_get() retrieves a single event structure/slot and port_getn() 356 * retrieves a list of event structures/slots. 357 * Both functions are able to poll for events and return immediately or they 358 * can specify a timeout value. 359 * Before the events are delivered to the application they are moved to a 360 * second temporary internal queue. The idea is to avoid lock collisions or 361 * contentions of the global queue lock. 362 * The global queue lock is used every time when an event source delivers 363 * new events to the port. 364 * The port_get() and port_getn() functions 365 * a) retrieve single events from the temporary queue, 366 * b) prepare the data to be passed to the application memory, 367 * c) activate the callback function of the event sources: 368 * - to get the latest event data, 369 * - the event source can free all allocated resources associated with the 370 * current event, 371 * - the event source can re-use the current event slot/structure 372 * - the event source can deny the delivery of the event to the application 373 * (e.g. because of the wrong process). 374 * d) put the event back to the temporary queue if the event delivery was denied 375 * e) repeat a) until d) as long as there are events in the queue and 376 * there is enough user space available. 377 * 378 * The loop described above could block for a very long time the global mutex, 379 * to avoid that a second mutex was introduced to synchronized concurrent 380 * threads accessing the temporary queue. 381 */ 382 383 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, 384 uintptr_t); 385 386 static struct sysent port_sysent = { 387 6, 388 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 389 (int (*)())portfs, 390 }; 391 392 static struct modlsys modlsys = { 393 &mod_syscallops, "event ports", &port_sysent 394 }; 395 396 #ifdef _SYSCALL32_IMPL 397 398 static int64_t 399 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4, 400 uint32_t arg5, uint32_t arg6); 401 402 static struct sysent port_sysent32 = { 403 6, 404 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 405 (int (*)())portfs32, 406 }; 407 408 static struct modlsys modlsys32 = { 409 &mod_syscallops32, 410 "32-bit event ports syscalls", 411 &port_sysent32 412 }; 413 #endif /* _SYSCALL32_IMPL */ 414 415 static struct modlinkage modlinkage = { 416 MODREV_1, 417 &modlsys, 418 #ifdef _SYSCALL32_IMPL 419 &modlsys32, 420 #endif 421 NULL 422 }; 423 424 port_kstat_t port_kstat = { 425 { "ports", KSTAT_DATA_UINT32 } 426 }; 427 428 dev_t portdev; 429 struct vnodeops *port_vnodeops; 430 struct vfs port_vfs; 431 432 extern rctl_hndl_t rc_process_portev; 433 extern rctl_hndl_t rc_project_portids; 434 extern void aio_close_port(void *, int, pid_t, int); 435 436 /* 437 * This table contains a list of event sources which need a static 438 * association with a port (every port). 439 * The last NULL entry in the table is required to detect "end of table". 440 */ 441 struct port_ksource port_ksource_tab[] = { 442 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL}, 443 {0, NULL, NULL, NULL} 444 }; 445 446 /* local functions */ 447 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *, 448 port_gettimer_t *); 449 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *); 450 static int port_alert(port_t *, int, int, void *); 451 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *); 452 static int port_send(port_t *, int, int, void *); 453 static int port_create(int *); 454 static int port_get_alert(port_alert_t *, port_event_t *); 455 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *); 456 static int *port_errorn(int *, int, int, int); 457 static int port_noshare(void *, int *, pid_t, int, void *); 458 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *, 459 int); 460 static void port_init(port_t *); 461 static void port_remove_alert(port_queue_t *); 462 static void port_add_ksource_local(port_t *, port_ksource_t *); 463 static void port_check_return_cond(port_queue_t *); 464 static void port_dequeue_thread(port_queue_t *, portget_t *); 465 static portget_t *port_queue_thread(port_queue_t *, uint_t); 466 static void port_kstat_init(void); 467 468 #ifdef _SYSCALL32_IMPL 469 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *); 470 #endif 471 472 int 473 _init(void) 474 { 475 static const fs_operation_def_t port_vfsops_template[] = { 476 NULL, NULL 477 }; 478 extern const fs_operation_def_t port_vnodeops_template[]; 479 vfsops_t *port_vfsops; 480 int error; 481 major_t major; 482 483 if ((major = getudev()) == (major_t)-1) 484 return (ENXIO); 485 portdev = makedevice(major, 0); 486 487 /* Create a dummy vfs */ 488 error = vfs_makefsops(port_vfsops_template, &port_vfsops); 489 if (error) { 490 cmn_err(CE_WARN, "port init: bad vfs ops"); 491 return (error); 492 } 493 vfs_setops(&port_vfs, port_vfsops); 494 port_vfs.vfs_flag = VFS_RDONLY; 495 port_vfs.vfs_dev = portdev; 496 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0); 497 498 error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops); 499 if (error) { 500 vfs_freevfsops(port_vfsops); 501 cmn_err(CE_WARN, "port init: bad vnode ops"); 502 return (error); 503 } 504 505 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL); 506 port_control.pc_nents = 0; /* number of active ports */ 507 508 /* create kmem_cache for port event structures */ 509 port_control.pc_cache = kmem_cache_create("port_cache", 510 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 511 512 port_kstat_init(); /* init port kstats */ 513 return (mod_install(&modlinkage)); 514 } 515 516 int 517 _info(struct modinfo *modinfop) 518 { 519 return (mod_info(&modlinkage, modinfop)); 520 } 521 522 /* 523 * System call wrapper for all port related system calls from 32-bit programs. 524 */ 525 #ifdef _SYSCALL32_IMPL 526 static int64_t 527 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, 528 uint32_t a4) 529 { 530 int64_t error; 531 532 switch (opcode & PORT_CODE_MASK) { 533 case PORT_GET: 534 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4); 535 break; 536 case PORT_SENDN: 537 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4); 538 break; 539 default: 540 error = portfs(opcode, a0, a1, a2, a3, a4); 541 break; 542 } 543 return (error); 544 } 545 #endif /* _SYSCALL32_IMPL */ 546 547 /* 548 * System entry point for port functions. 549 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE). 550 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a 551 * port file descriptor as first argument. 552 */ 553 static int64_t 554 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, 555 uintptr_t a4) 556 { 557 rval_t r; 558 port_t *pp; 559 int error = 0; 560 uint_t nget; 561 file_t *fp; 562 port_gettimer_t port_timer; 563 564 r.r_vals = 0; 565 if (opcode & PORT_SYS_NOPORT) { 566 opcode &= PORT_CODE_MASK; 567 if (opcode == PORT_SENDN) { 568 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2, 569 (int)a3, (void *)a4, (uint_t *)&r.r_val1); 570 if (error && (error != EIO)) 571 return ((int64_t)set_errno(error)); 572 return (r.r_vals); 573 } 574 575 if (opcode == PORT_CREATE) { 576 error = port_create(&r.r_val1); 577 if (error) 578 return ((int64_t)set_errno(error)); 579 return (r.r_vals); 580 } 581 } 582 583 /* opcodes using port as first argument (a0) */ 584 585 if ((fp = getf((int)a0)) == NULL) 586 return ((uintptr_t)set_errno(EBADF)); 587 588 if (fp->f_vnode->v_type != VPORT) { 589 releasef((int)a0); 590 return ((uintptr_t)set_errno(EBADFD)); 591 } 592 593 pp = VTOEP(fp->f_vnode); 594 595 switch (opcode & PORT_CODE_MASK) { 596 case PORT_GET: 597 { 598 /* see PORT_GETN description */ 599 struct timespec timeout; 600 601 port_timer.pgt_flags = PORTGET_ONE; 602 port_timer.pgt_loop = 0; 603 port_timer.pgt_rqtp = NULL; 604 if (a4 != NULL) { 605 port_timer.pgt_timeout = &timeout; 606 timeout.tv_sec = (time_t)a2; 607 timeout.tv_nsec = (long)a3; 608 } else { 609 port_timer.pgt_timeout = NULL; 610 } 611 do { 612 nget = 1; 613 error = port_getn(pp, (port_event_t *)a1, 1, 614 (uint_t *)&nget, &port_timer); 615 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 616 break; 617 } 618 case PORT_GETN: 619 { 620 /* 621 * port_getn() can only retrieve own or shareable events from 622 * other processes. The port_getn() function remains in the 623 * kernel until own or shareable events are available or the 624 * timeout elapses. 625 */ 626 port_timer.pgt_flags = 0; 627 port_timer.pgt_loop = 0; 628 port_timer.pgt_rqtp = NULL; 629 port_timer.pgt_timeout = (struct timespec *)a4; 630 do { 631 nget = a3; 632 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2, 633 (uint_t *)&nget, &port_timer); 634 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 635 r.r_val1 = nget; 636 r.r_val2 = error; 637 releasef((int)a0); 638 if (error && error != ETIME) 639 return ((int64_t)set_errno(error)); 640 return (r.r_vals); 641 } 642 case PORT_ASSOCIATE: 643 { 644 switch ((int)a1) { 645 case PORT_SOURCE_FD: 646 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, 647 (int)a3, (void *)a4); 648 break; 649 case PORT_SOURCE_FILE: 650 error = port_associate_fop(pp, (int)a1, (uintptr_t)a2, 651 (int)a3, (void *)a4); 652 break; 653 default: 654 error = EINVAL; 655 break; 656 } 657 break; 658 } 659 case PORT_SEND: 660 { 661 /* user-defined events */ 662 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2); 663 break; 664 } 665 case PORT_DISPATCH: 666 { 667 /* 668 * library events, blocking 669 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ 670 * are currently allowed. 671 */ 672 if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) { 673 error = EINVAL; 674 break; 675 } 676 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2, 677 (uintptr_t)a3, (void *)a4); 678 break; 679 } 680 case PORT_DISSOCIATE: 681 { 682 switch ((int)a1) { 683 case PORT_SOURCE_FD: 684 error = port_dissociate_fd(pp, (uintptr_t)a2); 685 break; 686 case PORT_SOURCE_FILE: 687 error = port_dissociate_fop(pp, (uintptr_t)a2); 688 break; 689 default: 690 error = EINVAL; 691 break; 692 } 693 break; 694 } 695 case PORT_ALERT: 696 { 697 if ((int)a2) /* a2 = events */ 698 error = port_alert(pp, (int)a1, (int)a2, (void *)a3); 699 else 700 port_remove_alert(&pp->port_queue); 701 break; 702 } 703 default: 704 error = EINVAL; 705 break; 706 } 707 708 releasef((int)a0); 709 if (error) 710 return ((int64_t)set_errno(error)); 711 return (r.r_vals); 712 } 713 714 /* 715 * System call to create a port. 716 * 717 * The port_create() function creates a vnode of type VPORT per port. 718 * The port control data is associated with the vnode as vnode private data. 719 * The port_create() function returns an event port file descriptor. 720 */ 721 static int 722 port_create(int *fdp) 723 { 724 port_t *pp; 725 vnode_t *vp; 726 struct file *fp; 727 proc_t *p = curproc; 728 729 /* initialize vnode and port private data */ 730 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP); 731 732 pp->port_vnode = vn_alloc(KM_SLEEP); 733 vp = EPTOV(pp); 734 vn_setops(vp, port_vnodeops); 735 vp->v_type = VPORT; 736 vp->v_vfsp = &port_vfs; 737 vp->v_data = (caddr_t)pp; 738 739 mutex_enter(&port_control.pc_mutex); 740 /* 741 * Retrieve the maximal number of event ports allowed per system from 742 * the resource control: project.port-max-ids. 743 */ 744 mutex_enter(&p->p_lock); 745 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p, 746 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) { 747 mutex_exit(&p->p_lock); 748 vn_free(vp); 749 kmem_free(pp, sizeof (port_t)); 750 mutex_exit(&port_control.pc_mutex); 751 return (EAGAIN); 752 } 753 754 /* 755 * Retrieve the maximal number of events allowed per port from 756 * the resource control: process.port-max-events. 757 */ 758 pp->port_max_events = rctl_enforced_value(rc_process_portev, 759 p->p_rctls, p); 760 mutex_exit(&p->p_lock); 761 762 /* allocate a new user file descriptor and a file structure */ 763 if (falloc(vp, 0, &fp, fdp)) { 764 /* 765 * If the file table is full, free allocated resources. 766 */ 767 vn_free(vp); 768 kmem_free(pp, sizeof (port_t)); 769 mutex_exit(&port_control.pc_mutex); 770 return (EMFILE); 771 } 772 773 mutex_exit(&fp->f_tlock); 774 775 pp->port_fd = *fdp; 776 port_control.pc_nents++; 777 p->p_portcnt++; 778 port_kstat.pks_ports.value.ui32++; 779 mutex_exit(&port_control.pc_mutex); 780 781 /* initializes port private data */ 782 port_init(pp); 783 /* set user file pointer */ 784 setf(*fdp, fp); 785 return (0); 786 } 787 788 /* 789 * port_init() initializes event port specific data 790 */ 791 static void 792 port_init(port_t *pp) 793 { 794 port_queue_t *portq; 795 port_ksource_t *pks; 796 797 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL); 798 portq = &pp->port_queue; 799 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL); 800 pp->port_flags |= PORT_INIT; 801 802 /* 803 * If it is not enough memory available to satisfy a user 804 * request using a single port_getn() call then port_getn() 805 * will reduce the size of the list to PORT_MAX_LIST. 806 */ 807 pp->port_max_list = port_max_list; 808 809 /* Set timestamp entries required for fstat(2) requests */ 810 gethrestime(&pp->port_ctime); 811 pp->port_uid = crgetuid(curproc->p_cred); 812 pp->port_gid = crgetgid(curproc->p_cred); 813 814 /* initialize port queue structs */ 815 list_create(&portq->portq_list, sizeof (port_kevent_t), 816 offsetof(port_kevent_t, portkev_node)); 817 list_create(&portq->portq_get_list, sizeof (port_kevent_t), 818 offsetof(port_kevent_t, portkev_node)); 819 portq->portq_flags = 0; 820 pp->port_pid = curproc->p_pid; 821 822 /* Allocate cache skeleton for PORT_SOURCE_FD events */ 823 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP); 824 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL); 825 826 /* 827 * Allocate cache skeleton for association of event sources. 828 */ 829 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL); 830 portq->portq_scache = kmem_zalloc( 831 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP); 832 833 /* 834 * pre-associate some kernel sources with this port. 835 * The pre-association is required to create port_source_t 836 * structures for object association. 837 * Some sources can not get associated with a port before the first 838 * object association is requested. Another reason to pre_associate 839 * a particular source with a port is because of performance. 840 */ 841 842 for (pks = port_ksource_tab; pks->pks_source != 0; pks++) 843 port_add_ksource_local(pp, pks); 844 } 845 846 /* 847 * The port_add_ksource_local() function is being used to associate 848 * event sources with every new port. 849 * The event sources need to be added to port_ksource_tab[]. 850 */ 851 static void 852 port_add_ksource_local(port_t *pp, port_ksource_t *pks) 853 { 854 port_source_t *pse; 855 port_source_t **ps; 856 857 mutex_enter(&pp->port_queue.portq_source_mutex); 858 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)]; 859 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { 860 if (pse->portsrc_source == pks->pks_source) 861 break; 862 } 863 864 if (pse == NULL) { 865 /* associate new source with the port */ 866 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP); 867 pse->portsrc_source = pks->pks_source; 868 pse->portsrc_close = pks->pks_close; 869 pse->portsrc_closearg = pks->pks_closearg; 870 pse->portsrc_cnt = 1; 871 872 pks->pks_portsrc = pse; 873 if (*ps != NULL) 874 pse->portsrc_next = (*ps)->portsrc_next; 875 *ps = pse; 876 } 877 mutex_exit(&pp->port_queue.portq_source_mutex); 878 } 879 880 /* 881 * The port_send() function sends an event of type "source" to a 882 * port. This function is non-blocking. An event can be sent to 883 * a port as long as the number of events per port does not achieve the 884 * maximal allowed number of events. The max. number of events per port is 885 * defined by the resource control process.max-port-events. 886 * This function is used by the port library function port_send() 887 * and port_dispatch(). The port_send(3c) function is part of the 888 * event ports API and submits events of type PORT_SOURCE_USER. The 889 * port_dispatch() function is project private and it is used by library 890 * functions to submit events of other types than PORT_SOURCE_USER 891 * (e.g. PORT_SOURCE_AIO). 892 */ 893 static int 894 port_send(port_t *pp, int source, int events, void *user) 895 { 896 port_kevent_t *pev; 897 int error; 898 899 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev); 900 if (error) 901 return (error); 902 903 pev->portkev_object = 0; 904 pev->portkev_events = events; 905 pev->portkev_user = user; 906 pev->portkev_callback = NULL; 907 pev->portkev_arg = NULL; 908 pev->portkev_flags = 0; 909 910 port_send_event(pev); 911 return (0); 912 } 913 914 /* 915 * The port_noshare() function returns 0 if the current event was generated 916 * by the same process. Otherwise is returns a value other than 0 and the 917 * event should not be delivered to the current processe. 918 * The port_noshare() function is normally used by the port_dispatch() 919 * function. The port_dispatch() function is project private and can only be 920 * used within the event port project. 921 * Currently the libaio uses the port_dispatch() function to deliver events 922 * of types PORT_SOURCE_AIO. 923 */ 924 /* ARGSUSED */ 925 static int 926 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp) 927 { 928 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid) 929 return (1); 930 return (0); 931 } 932 933 /* 934 * The port_dispatch_event() function is project private and it is used by 935 * libraries involved in the project to deliver events to the port. 936 * port_dispatch will sleep and wait for enough resources to satisfy the 937 * request, if necessary. 938 * The library can specify if the delivered event is shareable with other 939 * processes (see PORT_SYS_NOSHARE flag). 940 */ 941 static int 942 port_dispatch_event(port_t *pp, int opcode, int source, int events, 943 uintptr_t object, void *user) 944 { 945 port_kevent_t *pev; 946 int error; 947 948 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev); 949 if (error) 950 return (error); 951 952 pev->portkev_object = object; 953 pev->portkev_events = events; 954 pev->portkev_user = user; 955 pev->portkev_arg = NULL; 956 if (opcode & PORT_SYS_NOSHARE) { 957 pev->portkev_flags = PORT_KEV_NOSHARE; 958 pev->portkev_callback = port_noshare; 959 } else { 960 pev->portkev_flags = 0; 961 pev->portkev_callback = NULL; 962 } 963 964 port_send_event(pev); 965 return (0); 966 } 967 968 969 /* 970 * The port_sendn() function is the kernel implementation of the event 971 * port API function port_sendn(3c). 972 * This function is able to send an event to a list of event ports. 973 */ 974 static int 975 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user, 976 uint_t *nget) 977 { 978 port_kevent_t *pev; 979 int errorcnt = 0; 980 int error = 0; 981 int count; 982 int port; 983 int *plist; 984 int *elist = NULL; 985 file_t *fp; 986 port_t *pp; 987 988 if (nent == 0 || nent > port_max_list) 989 return (EINVAL); 990 991 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP); 992 if (copyin((void *)ports, plist, nent * sizeof (int))) { 993 kmem_free(plist, nent * sizeof (int)); 994 return (EFAULT); 995 } 996 997 /* 998 * Scan the list for event port file descriptors and send the 999 * attached user event data embedded in a event of type 1000 * PORT_SOURCE_USER to every event port in the list. 1001 * If a list entry is not a valid event port then the corresponding 1002 * error code will be stored in the errors[] list with the same 1003 * list offset as in the ports[] list. 1004 */ 1005 1006 for (count = 0; count < nent; count++) { 1007 port = plist[count]; 1008 if ((fp = getf(port)) == NULL) { 1009 elist = port_errorn(elist, nent, EBADF, count); 1010 errorcnt++; 1011 continue; 1012 } 1013 1014 pp = VTOEP(fp->f_vnode); 1015 if (fp->f_vnode->v_type != VPORT) { 1016 releasef(port); 1017 elist = port_errorn(elist, nent, EBADFD, count); 1018 errorcnt++; 1019 continue; 1020 } 1021 1022 error = port_alloc_event_local(pp, PORT_SOURCE_USER, 1023 PORT_ALLOC_DEFAULT, &pev); 1024 if (error) { 1025 releasef(port); 1026 elist = port_errorn(elist, nent, error, count); 1027 errorcnt++; 1028 continue; 1029 } 1030 1031 pev->portkev_object = 0; 1032 pev->portkev_events = events; 1033 pev->portkev_user = user; 1034 pev->portkev_callback = NULL; 1035 pev->portkev_arg = NULL; 1036 pev->portkev_flags = 0; 1037 1038 port_send_event(pev); 1039 releasef(port); 1040 } 1041 if (errorcnt) { 1042 error = EIO; 1043 if (copyout(elist, (void *)errors, nent * sizeof (int))) 1044 error = EFAULT; 1045 kmem_free(elist, nent * sizeof (int)); 1046 } 1047 *nget = nent - errorcnt; 1048 kmem_free(plist, nent * sizeof (int)); 1049 return (error); 1050 } 1051 1052 static int * 1053 port_errorn(int *elist, int nent, int error, int index) 1054 { 1055 if (elist == NULL) 1056 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP); 1057 elist[index] = error; 1058 return (elist); 1059 } 1060 1061 /* 1062 * port_alert() 1063 * The port_alert() funcion is a high priority event and it is always set 1064 * on top of the queue. It is also delivered as single event. 1065 * flags: 1066 * - SET :overwrite current alert data 1067 * - UPDATE:set alert data or return EBUSY if alert mode is already set 1068 * 1069 * - set the ALERT flag 1070 * - wakeup all sleeping threads 1071 */ 1072 static int 1073 port_alert(port_t *pp, int flags, int events, void *user) 1074 { 1075 port_queue_t *portq; 1076 portget_t *pgetp; 1077 port_alert_t *pa; 1078 1079 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID) 1080 return (EINVAL); 1081 1082 portq = &pp->port_queue; 1083 pa = &portq->portq_alert; 1084 mutex_enter(&portq->portq_mutex); 1085 1086 /* check alert conditions */ 1087 if (flags == PORT_ALERT_UPDATE) { 1088 if (portq->portq_flags & PORTQ_ALERT) { 1089 mutex_exit(&portq->portq_mutex); 1090 return (EBUSY); 1091 } 1092 } 1093 1094 /* 1095 * Store alert data in the port to be delivered to threads 1096 * which are using port_get(n) to retrieve events. 1097 */ 1098 1099 portq->portq_flags |= PORTQ_ALERT; 1100 pa->portal_events = events; /* alert info */ 1101 pa->portal_pid = curproc->p_pid; /* process owner */ 1102 pa->portal_object = 0; /* no object */ 1103 pa->portal_user = user; /* user alert data */ 1104 1105 /* alert and deliver alert data to waiting threads */ 1106 pgetp = portq->portq_thread; 1107 if (pgetp == NULL) { 1108 /* no threads waiting for events */ 1109 mutex_exit(&portq->portq_mutex); 1110 return (0); 1111 } 1112 1113 /* 1114 * Set waiting threads in alert mode (PORTGET_ALERT).. 1115 * Every thread waiting for events already allocated a portget_t 1116 * structure to sleep on. 1117 * The port alert arguments are stored in the portget_t structure. 1118 * The PORTGET_ALERT flag is set to indicate the thread to return 1119 * immediately with the alert event. 1120 */ 1121 do { 1122 if ((pgetp->portget_state & PORTGET_ALERT) == 0) { 1123 pa = &pgetp->portget_alert; 1124 pa->portal_events = events; 1125 pa->portal_object = 0; 1126 pa->portal_user = user; 1127 pgetp->portget_state |= PORTGET_ALERT; 1128 cv_signal(&pgetp->portget_cv); 1129 } 1130 } while ((pgetp = pgetp->portget_next) != portq->portq_thread); 1131 mutex_exit(&portq->portq_mutex); 1132 return (0); 1133 } 1134 1135 /* 1136 * Clear alert state of the port 1137 */ 1138 static void 1139 port_remove_alert(port_queue_t *portq) 1140 { 1141 mutex_enter(&portq->portq_mutex); 1142 portq->portq_flags &= ~PORTQ_ALERT; 1143 mutex_exit(&portq->portq_mutex); 1144 } 1145 1146 /* 1147 * The port_getn() function is used to retrieve events from a port. 1148 * 1149 * The port_getn() function returns immediately if there are enough events 1150 * available in the port to satisfy the request or if the port is in alert 1151 * mode (see port_alert(3c)). 1152 * The timeout argument of port_getn(3c) -which is embedded in the 1153 * port_gettimer_t structure- specifies if the system call should block or if it 1154 * should return immediately depending on the number of events available. 1155 * This function is internally used by port_getn(3c) as well as by 1156 * port_get(3c). 1157 */ 1158 static int 1159 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget, 1160 port_gettimer_t *pgt) 1161 { 1162 port_queue_t *portq; 1163 port_kevent_t *pev; 1164 port_kevent_t *lev; 1165 int error = 0; 1166 uint_t nmax; 1167 uint_t nevents; 1168 uint_t eventsz; 1169 port_event_t *kevp; 1170 list_t *glist; 1171 uint_t tnent; 1172 int rval; 1173 int blocking = -1; 1174 int timecheck; 1175 int flag; 1176 timespec_t rqtime; 1177 timespec_t *rqtp = NULL; 1178 portget_t *pgetp; 1179 void *results; 1180 model_t model = get_udatamodel(); 1181 1182 flag = pgt->pgt_flags; 1183 1184 if (*nget > max && max > 0) 1185 return (EINVAL); 1186 1187 portq = &pp->port_queue; 1188 mutex_enter(&portq->portq_mutex); 1189 if (max == 0) { 1190 /* 1191 * Return number of objects with events. 1192 * The port_block() call is required to synchronize this 1193 * thread with another possible thread, which could be 1194 * retrieving events from the port queue. 1195 */ 1196 port_block(portq); 1197 /* 1198 * Check if a second thread is currently retrieving events 1199 * and it is using the temporary event queue. 1200 */ 1201 if (portq->portq_tnent) { 1202 /* put remaining events back to the port queue */ 1203 port_push_eventq(portq); 1204 } 1205 *nget = portq->portq_nent; 1206 port_unblock(portq); 1207 mutex_exit(&portq->portq_mutex); 1208 return (0); 1209 } 1210 1211 if (uevp == NULL) { 1212 mutex_exit(&portq->portq_mutex); 1213 return (EFAULT); 1214 } 1215 if (*nget == 0) { /* no events required */ 1216 mutex_exit(&portq->portq_mutex); 1217 return (0); 1218 } 1219 1220 /* port is being closed ... */ 1221 if (portq->portq_flags & PORTQ_CLOSE) { 1222 mutex_exit(&portq->portq_mutex); 1223 return (EBADFD); 1224 } 1225 1226 /* return immediately if port in alert mode */ 1227 if (portq->portq_flags & PORTQ_ALERT) { 1228 error = port_get_alert(&portq->portq_alert, uevp); 1229 if (error == 0) 1230 *nget = 1; 1231 mutex_exit(&portq->portq_mutex); 1232 return (error); 1233 } 1234 1235 portq->portq_thrcnt++; 1236 1237 /* 1238 * Now check if the completed events satisfy the 1239 * "wait" requirements of the current thread: 1240 */ 1241 1242 if (pgt->pgt_loop) { 1243 /* 1244 * loop entry of same thread 1245 * pgt_loop is set when the current thread returns 1246 * prematurely from this function. That could happen 1247 * when a port is being shared between processes and 1248 * this thread could not find events to return. 1249 * It is not allowed to a thread to retrieve non-shareable 1250 * events generated in other processes. 1251 * PORTQ_WAIT_EVENTS is set when a thread already 1252 * checked the current event queue and no new events 1253 * are added to the queue. 1254 */ 1255 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) && 1256 (portq->portq_nent >= *nget)) { 1257 /* some new events arrived ...check them */ 1258 goto portnowait; 1259 } 1260 rqtp = pgt->pgt_rqtp; 1261 timecheck = pgt->pgt_timecheck; 1262 pgt->pgt_flags |= PORTGET_WAIT_EVENTS; 1263 } else { 1264 /* check if enough events are available ... */ 1265 if (portq->portq_nent >= *nget) 1266 goto portnowait; 1267 /* 1268 * There are not enough events available to satisfy 1269 * the request, check timeout value and wait for 1270 * incoming events. 1271 */ 1272 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp, 1273 &blocking, flag); 1274 if (error) { 1275 port_check_return_cond(portq); 1276 mutex_exit(&portq->portq_mutex); 1277 return (error); 1278 } 1279 1280 if (blocking == 0) /* don't block, check fired events */ 1281 goto portnowait; 1282 1283 if (rqtp != NULL) { 1284 timespec_t now; 1285 timecheck = timechanged; 1286 gethrestime(&now); 1287 timespecadd(rqtp, &now); 1288 } 1289 } 1290 1291 /* enqueue thread in the list of waiting threads */ 1292 pgetp = port_queue_thread(portq, *nget); 1293 1294 1295 /* Wait here until return conditions met */ 1296 for (;;) { 1297 if (pgetp->portget_state & PORTGET_ALERT) { 1298 /* reap alert event and return */ 1299 error = port_get_alert(&pgetp->portget_alert, uevp); 1300 if (error) 1301 *nget = 0; 1302 else 1303 *nget = 1; 1304 port_dequeue_thread(&pp->port_queue, pgetp); 1305 portq->portq_thrcnt--; 1306 mutex_exit(&portq->portq_mutex); 1307 return (error); 1308 } 1309 1310 /* 1311 * Check if some other thread is already retrieving 1312 * events (portq_getn > 0). 1313 */ 1314 1315 if ((portq->portq_getn == 0) && 1316 ((portq)->portq_nent >= *nget) && 1317 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) || 1318 !((portq)->portq_flags & PORTQ_WAIT_EVENTS))) 1319 break; 1320 1321 if (portq->portq_flags & PORTQ_CLOSE) { 1322 error = EBADFD; 1323 break; 1324 } 1325 1326 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex, 1327 rqtp, timecheck); 1328 1329 if (rval <= 0) { 1330 error = (rval == 0) ? EINTR : ETIME; 1331 break; 1332 } 1333 } 1334 1335 /* take thread out of the wait queue */ 1336 port_dequeue_thread(portq, pgetp); 1337 1338 if (error != 0 && (error == EINTR || error == EBADFD || 1339 (error == ETIME && flag))) { 1340 /* return without events */ 1341 port_check_return_cond(portq); 1342 mutex_exit(&portq->portq_mutex); 1343 return (error); 1344 } 1345 1346 portnowait: 1347 /* 1348 * Move port event queue to a temporary event queue . 1349 * New incoming events will be continue be posted to the event queue 1350 * and they will not be considered by the current thread. 1351 * The idea is to avoid lock contentions or an often locking/unlocking 1352 * of the port queue mutex. The contention and performance degradation 1353 * could happen because: 1354 * a) incoming events use the port queue mutex to enqueue new events and 1355 * b) before the event can be delivered to the application it is 1356 * necessary to notify the event sources about the event delivery. 1357 * Sometimes the event sources can require a long time to return and 1358 * the queue mutex would block incoming events. 1359 * During this time incoming events (port_send_event()) do not need 1360 * to awake threads waiting for events. Before the current thread 1361 * returns it will check the conditions to awake other waiting threads. 1362 */ 1363 portq->portq_getn++; /* number of threads retrieving events */ 1364 port_block(portq); /* block other threads here */ 1365 nmax = max < portq->portq_nent ? max : portq->portq_nent; 1366 1367 if (portq->portq_tnent) { 1368 /* 1369 * Move remaining events from previous thread back to the 1370 * port event queue. 1371 */ 1372 port_push_eventq(portq); 1373 } 1374 /* move port event queue to a temporary queue */ 1375 list_move_tail(&portq->portq_get_list, &portq->portq_list); 1376 glist = &portq->portq_get_list; /* use temporary event queue */ 1377 tnent = portq->portq_nent; /* get current number of events */ 1378 portq->portq_nent = 0; /* no events in the port event queue */ 1379 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */ 1380 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */ 1381 1382 if (model == DATAMODEL_NATIVE) { 1383 eventsz = sizeof (port_event_t); 1384 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1385 if (kevp == NULL) { 1386 if (nmax > pp->port_max_list) 1387 nmax = pp->port_max_list; 1388 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP); 1389 } 1390 results = kevp; 1391 lev = NULL; /* start with first event in the queue */ 1392 for (nevents = 0; nevents < nmax; ) { 1393 pev = port_get_kevent(glist, lev); 1394 if (pev == NULL) /* no more events available */ 1395 break; 1396 if (pev->portkev_flags & PORT_KEV_FREE) { 1397 /* Just discard event */ 1398 list_remove(glist, pev); 1399 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1400 if (PORT_FREE_EVENT(pev)) 1401 port_free_event_local(pev, 0); 1402 tnent--; 1403 continue; 1404 } 1405 1406 /* move event data to copyout list */ 1407 if (port_copy_event(&kevp[nevents], pev, glist)) { 1408 /* 1409 * Event can not be delivered to the 1410 * current process. 1411 */ 1412 if (lev != NULL) 1413 list_insert_after(glist, lev, pev); 1414 else 1415 list_insert_head(glist, pev); 1416 lev = pev; /* last checked event */ 1417 } else { 1418 nevents++; /* # of events ready */ 1419 } 1420 } 1421 #ifdef _SYSCALL32_IMPL 1422 } else { 1423 port_event32_t *kevp32; 1424 1425 eventsz = sizeof (port_event32_t); 1426 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1427 if (kevp32 == NULL) { 1428 if (nmax > pp->port_max_list) 1429 nmax = pp->port_max_list; 1430 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP); 1431 } 1432 results = kevp32; 1433 lev = NULL; /* start with first event in the queue */ 1434 for (nevents = 0; nevents < nmax; ) { 1435 pev = port_get_kevent(glist, lev); 1436 if (pev == NULL) /* no more events available */ 1437 break; 1438 if (pev->portkev_flags & PORT_KEV_FREE) { 1439 /* Just discard event */ 1440 list_remove(glist, pev); 1441 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1442 if (PORT_FREE_EVENT(pev)) 1443 port_free_event_local(pev, 0); 1444 tnent--; 1445 continue; 1446 } 1447 1448 /* move event data to copyout list */ 1449 if (port_copy_event32(&kevp32[nevents], pev, glist)) { 1450 /* 1451 * Event can not be delivered to the 1452 * current process. 1453 */ 1454 if (lev != NULL) 1455 list_insert_after(glist, lev, pev); 1456 else 1457 list_insert_head(glist, pev); 1458 lev = pev; /* last checked event */ 1459 } else { 1460 nevents++; /* # of events ready */ 1461 } 1462 } 1463 #endif /* _SYSCALL32_IMPL */ 1464 } 1465 1466 /* 1467 * Remember number of remaining events in the temporary event queue. 1468 */ 1469 portq->portq_tnent = tnent - nevents; 1470 1471 /* 1472 * Work to do before return : 1473 * - push list of remaining events back to the top of the standard 1474 * port queue. 1475 * - if this is the last thread calling port_get(n) then wakeup the 1476 * thread waiting on close(2). 1477 * - check for a deferred cv_signal from port_send_event() and wakeup 1478 * the sleeping thread. 1479 */ 1480 1481 mutex_enter(&portq->portq_mutex); 1482 port_unblock(portq); 1483 if (portq->portq_tnent) { 1484 /* 1485 * move remaining events in the temporary event queue back 1486 * to the port event queue 1487 */ 1488 port_push_eventq(portq); 1489 } 1490 portq->portq_getn--; /* update # of threads retrieving events */ 1491 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */ 1492 /* Last thread => check close(2) conditions ... */ 1493 if (portq->portq_flags & PORTQ_CLOSE) { 1494 cv_signal(&portq->portq_closecv); 1495 mutex_exit(&portq->portq_mutex); 1496 kmem_free(results, eventsz * nmax); 1497 /* do not copyout events */ 1498 *nget = 0; 1499 return (EBADFD); 1500 } 1501 } else if (portq->portq_getn == 0) { 1502 /* 1503 * no other threads retrieving events ... 1504 * check wakeup conditions of sleeping threads 1505 */ 1506 if ((portq->portq_thread != NULL) && 1507 (portq->portq_nent >= portq->portq_nget)) 1508 cv_signal(&portq->portq_thread->portget_cv); 1509 } 1510 1511 /* 1512 * Check PORTQ_POLLIN here because the current thread set temporarily 1513 * the number of events in the queue to zero. 1514 */ 1515 if (portq->portq_flags & PORTQ_POLLIN) { 1516 portq->portq_flags &= ~PORTQ_POLLIN; 1517 mutex_exit(&portq->portq_mutex); 1518 pollwakeup(&pp->port_pollhd, POLLIN); 1519 } else { 1520 mutex_exit(&portq->portq_mutex); 1521 } 1522 1523 /* now copyout list of user event structures to user space */ 1524 if (nevents) { 1525 if (copyout(results, uevp, nevents * eventsz)) 1526 error = EFAULT; 1527 } 1528 kmem_free(results, eventsz * nmax); 1529 1530 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) { 1531 /* no events retrieved: check loop conditions */ 1532 if (blocking == -1) { 1533 /* no timeout checked */ 1534 error = port_get_timeout(pgt->pgt_timeout, 1535 &pgt->pgt_rqtime, &rqtp, &blocking, flag); 1536 if (error) { 1537 *nget = nevents; 1538 return (error); 1539 } 1540 if (rqtp != NULL) { 1541 timespec_t now; 1542 pgt->pgt_timecheck = timechanged; 1543 gethrestime(&now); 1544 timespecadd(&pgt->pgt_rqtime, &now); 1545 } 1546 pgt->pgt_rqtp = rqtp; 1547 } else { 1548 /* timeout already checked -> remember values */ 1549 pgt->pgt_rqtp = rqtp; 1550 if (rqtp != NULL) { 1551 pgt->pgt_timecheck = timecheck; 1552 pgt->pgt_rqtime = *rqtp; 1553 } 1554 } 1555 if (blocking) 1556 /* timeout remaining */ 1557 pgt->pgt_loop = 1; 1558 } 1559 1560 /* set number of user event structures completed */ 1561 *nget = nevents; 1562 return (error); 1563 } 1564 1565 /* 1566 * 1. copy kernel event structure to user event structure. 1567 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1568 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1569 * 4. Other types of event structures can be delivered back to the port cache 1570 * (port_free_event_local()). 1571 * 5. The event source callback function is the last opportunity for the 1572 * event source to update events, to free local resources associated with 1573 * the event or to deny the delivery of the event. 1574 */ 1575 static int 1576 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list) 1577 { 1578 int free_event = 0; 1579 int flags; 1580 int error; 1581 1582 puevp->portev_source = pkevp->portkev_source; 1583 puevp->portev_object = pkevp->portkev_object; 1584 puevp->portev_user = pkevp->portkev_user; 1585 puevp->portev_events = pkevp->portkev_events; 1586 1587 /* remove event from the queue */ 1588 list_remove(list, pkevp); 1589 1590 /* 1591 * Events of type PORT_KEV_WIRED remain allocated by the 1592 * event source. 1593 */ 1594 flags = pkevp->portkev_flags; 1595 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1596 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1597 else 1598 free_event = 1; 1599 1600 if (pkevp->portkev_callback) { 1601 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1602 &puevp->portev_events, pkevp->portkev_pid, 1603 PORT_CALLBACK_DEFAULT, pkevp); 1604 1605 if (error) { 1606 /* 1607 * Event can not be delivered. 1608 * Caller must reinsert the event into the queue. 1609 */ 1610 pkevp->portkev_flags = flags; 1611 return (error); 1612 } 1613 } 1614 if (free_event) 1615 port_free_event_local(pkevp, 0); 1616 return (0); 1617 } 1618 1619 #ifdef _SYSCALL32_IMPL 1620 /* 1621 * 1. copy kernel event structure to user event structure. 1622 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1623 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1624 * 4. Other types of event structures can be delivered back to the port cache 1625 * (port_free_event_local()). 1626 * 5. The event source callback function is the last opportunity for the 1627 * event source to update events, to free local resources associated with 1628 * the event or to deny the delivery of the event. 1629 */ 1630 static int 1631 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list) 1632 { 1633 int free_event = 0; 1634 int error; 1635 int flags; 1636 1637 puevp->portev_source = pkevp->portkev_source; 1638 puevp->portev_object = (daddr32_t)pkevp->portkev_object; 1639 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user; 1640 puevp->portev_events = pkevp->portkev_events; 1641 1642 /* remove event from the queue */ 1643 list_remove(list, pkevp); 1644 1645 /* 1646 * Events if type PORT_KEV_WIRED remain allocated by the 1647 * sub-system (source). 1648 */ 1649 1650 flags = pkevp->portkev_flags; 1651 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1652 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1653 else 1654 free_event = 1; 1655 1656 if (pkevp->portkev_callback != NULL) { 1657 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1658 &puevp->portev_events, pkevp->portkev_pid, 1659 PORT_CALLBACK_DEFAULT, pkevp); 1660 if (error) { 1661 /* 1662 * Event can not be delivered. 1663 * Caller must reinsert the event into the queue. 1664 */ 1665 pkevp->portkev_flags = flags; 1666 return (error); 1667 } 1668 } 1669 if (free_event) 1670 port_free_event_local(pkevp, 0); 1671 return (0); 1672 } 1673 #endif /* _SYSCALL32_IMPL */ 1674 1675 /* 1676 * copyout alert event. 1677 */ 1678 static int 1679 port_get_alert(port_alert_t *pa, port_event_t *uevp) 1680 { 1681 model_t model = get_udatamodel(); 1682 1683 /* copyout alert event structures to user space */ 1684 if (model == DATAMODEL_NATIVE) { 1685 port_event_t uev; 1686 uev.portev_source = PORT_SOURCE_ALERT; 1687 uev.portev_object = pa->portal_object; 1688 uev.portev_events = pa->portal_events; 1689 uev.portev_user = pa->portal_user; 1690 if (copyout(&uev, uevp, sizeof (port_event_t))) 1691 return (EFAULT); 1692 #ifdef _SYSCALL32_IMPL 1693 } else { 1694 port_event32_t uev32; 1695 uev32.portev_source = PORT_SOURCE_ALERT; 1696 uev32.portev_object = (daddr32_t)pa->portal_object; 1697 uev32.portev_events = pa->portal_events; 1698 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user; 1699 if (copyout(&uev32, uevp, sizeof (port_event32_t))) 1700 return (EFAULT); 1701 #endif /* _SYSCALL32_IMPL */ 1702 } 1703 return (0); 1704 } 1705 1706 /* 1707 * Check return conditions : 1708 * - pending port close(2) 1709 * - threads waiting for events 1710 */ 1711 static void 1712 port_check_return_cond(port_queue_t *portq) 1713 { 1714 ASSERT(MUTEX_HELD(&portq->portq_mutex)); 1715 portq->portq_thrcnt--; 1716 if (portq->portq_flags & PORTQ_CLOSE) { 1717 if (portq->portq_thrcnt == 0) 1718 cv_signal(&portq->portq_closecv); 1719 else 1720 cv_signal(&portq->portq_thread->portget_cv); 1721 } 1722 } 1723 1724 /* 1725 * The port_get_kevent() function returns 1726 * - the event located at the head of the queue if 'last' pointer is NULL 1727 * - the next event after the event pointed by 'last' 1728 * The caller of this function is responsible for the integrity of the queue 1729 * in use: 1730 * - port_getn() is using a temporary queue protected with port_block(). 1731 * - port_close_events() is working on the global event queue and protects 1732 * the queue with portq->portq_mutex. 1733 */ 1734 port_kevent_t * 1735 port_get_kevent(list_t *list, port_kevent_t *last) 1736 { 1737 if (last == NULL) 1738 return (list_head(list)); 1739 else 1740 return (list_next(list, last)); 1741 } 1742 1743 /* 1744 * The port_get_timeout() function gets the timeout data from user space 1745 * and converts that info into a corresponding internal representation. 1746 * The kerneldata flag means that the timeout data is already loaded. 1747 */ 1748 static int 1749 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp, 1750 int *blocking, int kerneldata) 1751 { 1752 model_t model = get_udatamodel(); 1753 1754 *rqtp = NULL; 1755 if (timeout == NULL) { 1756 *blocking = 1; 1757 return (0); 1758 } 1759 1760 if (kerneldata) { 1761 *rqtime = *timeout; 1762 } else { 1763 if (model == DATAMODEL_NATIVE) { 1764 if (copyin(timeout, rqtime, sizeof (*rqtime))) 1765 return (EFAULT); 1766 #ifdef _SYSCALL32_IMPL 1767 } else { 1768 timespec32_t wait_time_32; 1769 if (copyin(timeout, &wait_time_32, 1770 sizeof (wait_time_32))) 1771 return (EFAULT); 1772 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 1773 #endif /* _SYSCALL32_IMPL */ 1774 } 1775 } 1776 1777 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 1778 *blocking = 0; 1779 return (0); 1780 } 1781 1782 if (rqtime->tv_sec < 0 || 1783 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 1784 return (EINVAL); 1785 1786 *rqtp = rqtime; 1787 *blocking = 1; 1788 return (0); 1789 } 1790 1791 /* 1792 * port_queue_thread() 1793 * Threads requiring more events than available will be put in a wait queue. 1794 * There is a "thread wait queue" per port. 1795 * Threads requiring less events get a higher priority than others and they 1796 * will be awoken first. 1797 */ 1798 static portget_t * 1799 port_queue_thread(port_queue_t *portq, uint_t nget) 1800 { 1801 portget_t *pgetp; 1802 portget_t *ttp; 1803 portget_t *htp; 1804 1805 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP); 1806 pgetp->portget_nget = nget; 1807 pgetp->portget_pid = curproc->p_pid; 1808 if (portq->portq_thread == NULL) { 1809 /* first waiting thread */ 1810 portq->portq_thread = pgetp; 1811 portq->portq_nget = nget; 1812 pgetp->portget_prev = pgetp; 1813 pgetp->portget_next = pgetp; 1814 return (pgetp); 1815 } 1816 1817 /* 1818 * thread waiting for less events will be set on top of the queue. 1819 */ 1820 ttp = portq->portq_thread; 1821 htp = ttp; 1822 for (;;) { 1823 if (nget <= ttp->portget_nget) 1824 break; 1825 if (htp == ttp->portget_next) 1826 break; /* last event */ 1827 ttp = ttp->portget_next; 1828 } 1829 1830 /* add thread to the queue */ 1831 pgetp->portget_next = ttp; 1832 pgetp->portget_prev = ttp->portget_prev; 1833 ttp->portget_prev->portget_next = pgetp; 1834 ttp->portget_prev = pgetp; 1835 if (portq->portq_thread == ttp) 1836 portq->portq_thread = pgetp; 1837 portq->portq_nget = portq->portq_thread->portget_nget; 1838 return (pgetp); 1839 } 1840 1841 /* 1842 * Take thread out of the queue. 1843 */ 1844 static void 1845 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp) 1846 { 1847 if (pgetp->portget_next == pgetp) { 1848 /* last (single) waiting thread */ 1849 portq->portq_thread = NULL; 1850 portq->portq_nget = 0; 1851 } else { 1852 pgetp->portget_prev->portget_next = pgetp->portget_next; 1853 pgetp->portget_next->portget_prev = pgetp->portget_prev; 1854 if (portq->portq_thread == pgetp) 1855 portq->portq_thread = pgetp->portget_next; 1856 portq->portq_nget = portq->portq_thread->portget_nget; 1857 } 1858 kmem_free(pgetp, sizeof (portget_t)); 1859 } 1860 1861 /* 1862 * Set up event port kstats. 1863 */ 1864 static void 1865 port_kstat_init() 1866 { 1867 kstat_t *ksp; 1868 uint_t ndata; 1869 1870 ndata = sizeof (port_kstat) / sizeof (kstat_named_t); 1871 ksp = kstat_create("portfs", 0, "Event Ports", "misc", 1872 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL); 1873 if (ksp) { 1874 ksp->ks_data = &port_kstat; 1875 kstat_install(ksp); 1876 } 1877 } 1878