1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/cred.h> 30 #include <sys/modctl.h> 31 #include <sys/vfs.h> 32 #include <sys/vfs_opreg.h> 33 #include <sys/sysmacros.h> 34 #include <sys/cmn_err.h> 35 #include <sys/stat.h> 36 #include <sys/errno.h> 37 #include <sys/kmem.h> 38 #include <sys/file.h> 39 #include <sys/kstat.h> 40 #include <sys/port_impl.h> 41 #include <sys/task.h> 42 #include <sys/project.h> 43 44 /* 45 * Event Ports can be shared across threads or across processes. 46 * Every thread/process can use an own event port or a group of them 47 * can use a single port. A major request was also to get the ability 48 * to submit user-defined events to a port. The idea of the 49 * user-defined events is to use the event ports for communication between 50 * threads/processes (like message queues). User defined-events are queued 51 * in a port with the same priority as other event types. 52 * 53 * Events are delivered only once. The thread/process which is waiting 54 * for events with the "highest priority" (priority here is related to the 55 * internal strategy to wakeup waiting threads) will retrieve the event, 56 * all other threads/processes will not be notified. There is also 57 * the requirement to have events which should be submitted immediately 58 * to all "waiting" threads. That is the main task of the alert event. 59 * The alert event is submitted by the application to a port. The port 60 * changes from a standard mode to the alert mode. Now all waiting threads 61 * will be awaken immediately and they will return with the alert event. 62 * Threads trying to retrieve events from a port in alert mode will 63 * return immediately with the alert event. 64 * 65 * 66 * An event port is like a kernel queue, which accept events submitted from 67 * user level as well as events submitted from kernel sub-systems. Sub-systems 68 * able to submit events to a port are the so-called "event sources". 69 * Current event sources: 70 * PORT_SOURCE_AIO : events submitted per transaction completion from 71 * POSIX-I/O framework. 72 * PORT_SOURCE_TIMER : events submitted when a timer fires 73 * (see timer_create(3RT)). 74 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)). 75 * PORT_SOURCE_ALERT : events submitted from user. This is not really a 76 * single event, this is actually a port mode 77 * (see port_alert(3c)). 78 * PORT_SOURCE_USER : events submitted by applications with 79 * port_send(3c) or port_sendn(3c). 80 * PORT_SOURCE_FILE : events submitted per file being watched for file 81 * change events (see port_create(3c). 82 * 83 * There is a user API implemented in the libc library as well as a 84 * kernel API implemented in port_subr.c in genunix. 85 * The available user API functions are: 86 * port_create() : create a port as a file descriptor of portfs file system 87 * The standard close(2) function closes a port. 88 * port_associate() : associate a file descriptor with a port to be able to 89 * retrieve events from that file descriptor. 90 * port_dissociate(): remove the association of a file descriptor with a port. 91 * port_alert() : set/unset a port in alert mode 92 * port_send() : send an event of type PORT_SOURCE_USER to a port 93 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports 94 * port_get() : retrieve a single event from a port 95 * port_getn() : retrieve a list of events from a port 96 * 97 * The available kernel API functions are: 98 * port_allocate_event(): allocate an event slot/structure of/from a port 99 * port_init_event() : set event data in the event structure 100 * port_send_event() : send event to a port 101 * port_free_event() : deliver allocated slot/structure back to a port 102 * port_associate_ksource(): associate a kernel event source with a port 103 * port_dissociate_ksource(): dissociate a kernel event source from a port 104 * 105 * The libc implementation consists of small functions which pass the 106 * arguments to the kernel using the "portfs" system call. It means, all the 107 * synchronisation work is being done in the kernel. The "portfs" system 108 * call loads the portfs file system into the kernel. 109 * 110 * PORT CREATION 111 * The first function to be used is port_create() which internally creates 112 * a vnode and a portfs node. The portfs node is represented by the port_t 113 * structure, which again includes all the data necessary to control a port. 114 * port_create() returns a file descriptor, which needs to be used in almost 115 * all other event port functions. 116 * The maximum number of ports per system is controlled by the resource 117 * control: project:port-max-ids. 118 * 119 * EVENT GENERATION 120 * The second step is the triggering of events, which could be sent to a port. 121 * Every event source implements an own method to generate events for a port: 122 * PORT_SOURCE_AIO: 123 * The sigevent structure of the standard POSIX-IO functions 124 * was extended by an additional notification type. 125 * Standard notification types: 126 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD 127 * Event ports introduced now SIGEV_PORT. 128 * The notification type SIGEV_PORT specifies that a structure 129 * of type port_notify_t has to be attached to the sigev_value. 130 * The port_notify_t structure contains the event port file 131 * descriptor and a user-defined pointer. 132 * Internally the AIO implementation will use the kernel API 133 * functions to allocate an event port slot per transaction (aiocb) 134 * and sent the event to the port as soon as the transaction completes. 135 * All the events submitted per transaction are of type 136 * PORT_SOURCE_AIO. 137 * PORT_SOURCE_TIMER: 138 * The timer_create() function uses the same method as the 139 * PORT_SOURCE_AIO event source. It also uses the sigevent structure 140 * to deliver the port information. 141 * Internally the timer code will allocate a single event slot/struct 142 * per timer and it will send the timer event as soon as the timer 143 * fires. If the timer-fired event is not delivered to the application 144 * before the next period elapsed, then an overrun counter will be 145 * incremented. The timer event source uses a callback function to 146 * detect the delivery of the event to the application. At that time 147 * the timer callback function will update the event overrun counter. 148 * PORT_SOURCE_FD: 149 * This event source uses the port_associate() function to allocate 150 * an event slot/struct from a port. The application defines in the 151 * events argument of port_associate() the type of events which it is 152 * interested on. 153 * The internal pollwakeup() function is used by all the file 154 * systems --which are supporting the VOP_POLL() interface- to notify 155 * the upper layer (poll(2), devpoll(7d) and now event ports) about 156 * the event triggered (see valid events in poll(2)). 157 * The pollwakeup() function forwards the event to the layer registered 158 * to receive the current event. 159 * The port_dissociate() function can be used to free the allocated 160 * event slot from the port. Anyway, file descriptors deliver events 161 * only one time and remain deactivated until the application 162 * reactivates the association of a file descriptor with port_associate(). 163 * If an associated file descriptor is closed then the file descriptor 164 * will be dissociated automatically from the port. 165 * 166 * PORT_SOURCE_ALERT: 167 * This event type is generated when the port was previously set in 168 * alert mode using the port_alert() function. 169 * A single alert event is delivered to every thread which tries to 170 * retrieve events from a port. 171 * PORT_SOURCE_USER: 172 * This type of event is generated from user level using the port_send() 173 * function to send a user event to a port or the port_sendn() function 174 * to send an event to a list of ports. 175 * PORT_SOURCE_FILE: 176 * This event source uses the port_associate() interface to register 177 * a file to be monitored for changes. The file name that needs to be 178 * monitored is specified in the file_obj_t structure, a pointer to which 179 * is passed as an argument. The event types to be monitored are specified 180 * in the events argument. 181 * A file events monitor is represented internal per port per object 182 * address(the file_obj_t pointer). Which means there can be multiple 183 * watches registered on the same file using different file_obj_t 184 * structure pointer. With the help of the FEM(File Event Monitoring) 185 * hooks, the file's vnode ops are intercepted and relevant events 186 * delivered. The port_dissociate() function is used to de-register a 187 * file events monitor on a file. When the specified file is 188 * removed/renamed, the file events watch/monitor is automatically 189 * removed. 190 * 191 * EVENT DELIVERY / RETRIEVING EVENTS 192 * Events remain in the port queue until: 193 * - the application uses port_get() or port_getn() to retrieve events, 194 * - the event source cancel the event, 195 * - the event port is closed or 196 * - the process exits. 197 * The maximal number of events in a port queue is the maximal number 198 * of event slots/structures which can be allocated by event sources. 199 * The allocation of event slots/structures is controlled by the resource 200 * control: process.port-max-events. 201 * The port_get() function retrieves a single event and the port_getn() 202 * function retrieves a list of events. 203 * Events are classified as shareable and non-shareable events across processes. 204 * Non-shareable events are invisible for the port_get(n)() functions of 205 * processes other than the owner of the event. 206 * Shareable event types are: 207 * PORT_SOURCE_USER events 208 * This type of event is unconditionally shareable and without 209 * limitations. If the parent process sends a user event and closes 210 * the port afterwards, the event remains in the port and the child 211 * process will still be able to retrieve the user event. 212 * PORT_SOURCE_ALERT events 213 * This type of event is shareable between processes. 214 * Limitation: The alert mode of the port is removed if the owner 215 * (process which set the port in alert mode) of the 216 * alert event closes the port. 217 * PORT_SOURCE_FD events 218 * This type of event is conditional shareable between processes. 219 * After fork(2) all forked file descriptors are shareable between 220 * the processes. The child process is allowed to retrieve events 221 * from the associated file descriptors and it can also re-associate 222 * the fd with the port. 223 * Limitations: The child process is not allowed to dissociate 224 * the file descriptor from the port. Only the 225 * owner (process) of the association is allowed to 226 * dissociate the file descriptor from the port. 227 * If the owner of the association closes the port 228 * the association will be removed. 229 * PORT_SOURCE_AIO events 230 * This type of event is not shareable between processes. 231 * PORT_SOURCE_TIMER events 232 * This type of event is not shareable between processes. 233 * PORT_SOURCE_FILE events 234 * This type of event is not shareable between processes. 235 * 236 * FORK BEHAVIOUR 237 * On fork(2) the child process inherits all opened file descriptors from 238 * the parent process. This is also valid for port file descriptors. 239 * Associated file descriptors with a port maintain the association across the 240 * fork(2). It means, the child process gets full access to the port and 241 * it can retrieve events from all common associated file descriptors. 242 * Events of file descriptors created and associated with a port after the 243 * fork(2) are non-shareable and can only be retrieved by the same process. 244 * 245 * If the parent or the child process closes an exported port (using fork(2) 246 * or I_SENDFD) all the file descriptors associated with the port by the 247 * process will be dissociated from the port. Events of dissociated file 248 * descriptors as well as all non-shareable events will be discarded. 249 * The other process can continue working with the port as usual. 250 * 251 * CLOSING A PORT 252 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details. 253 * 254 * PORT EVENT STRUCTURES 255 * The global control structure of the event ports framework is port_control_t. 256 * port_control_t keeps track of the number of created ports in the system. 257 * The cache of the port event structures is also located in port_control_t. 258 * 259 * On port_create() the vnode and the portfs node is also created. 260 * The portfs node is represented by the port_t structure. 261 * The port_t structure manages all port specific tasks: 262 * - management of resource control values 263 * - port VOP_POLL interface 264 * - creation time 265 * - uid and gid of the port 266 * 267 * The port_t structure contains the port_queue_t structure. 268 * The port_queue_t structure contains all the data necessary for the 269 * queue management: 270 * - locking 271 * - condition variables 272 * - event counters 273 * - submitted events (represented by port_kevent_t structures) 274 * - threads waiting for event delivery (check portget_t structure) 275 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure) 276 * - event source management (managed by the port_source_t structure) 277 * - alert mode management (check port_alert_t structure) 278 * 279 * EVENT MANAGEMENT 280 * The event port file system creates a kmem_cache for internal allocation of 281 * event port structures. 282 * 283 * 1. Event source association with a port: 284 * The first step to do for event sources is to get associated with a port 285 * using the port_associate_ksource() function or adding an entry to the 286 * port_ksource_tab[]. An event source can get dissociated from a port 287 * using the port_dissociate_ksource() function. An entry in the 288 * port_ksource_tab[] implies that the source will be associated 289 * automatically with every new created port. 290 * The event source can deliver a callback function, which is used by the 291 * port to notify the event source about close(2). The idea is that 292 * in such a case the event source should free all allocated resources 293 * and it must return to the port all allocated slots/structures. 294 * The port_close() function will wait until all allocated event 295 * structures/slots are returned to the port. 296 * The callback function is not necessary when the event source does not 297 * maintain local resources, a second condition is that the event source 298 * can guarantee that allocated event slots will be returned without 299 * delay to the port (it will not block and sleep somewhere). 300 * 301 * 2. Reservation of an event slot / event structure 302 * The event port reliability is based on the reservation of an event "slot" 303 * (allocation of an event structure) by the event source as part of the 304 * application call. If the maximal number of event slots is exhausted then 305 * the event source can return a corresponding error code to the application. 306 * 307 * The port_alloc_event() function has to be used by event sources to 308 * allocate an event slot (reserve an event structure). The port_alloc_event() 309 * doesn not block and it will return a 0 value on success or an error code 310 * if it fails. 311 * An argument of port_alloc_event() is a flag which determines the behavior 312 * of the event after it was delivered to the application: 313 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the 314 * application. 315 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event 316 * source. This kind of slots can not be used for 317 * event delivery and should only be used internally 318 * by the event source. 319 * PORT_KEV_CACHED : event slot remains under the control of an event 320 * port cache. It does not become free after delivery 321 * to the application. 322 * PORT_ALLOC_SCACHED : event slot remains under the control of the event 323 * source. The event source takes the control over 324 * the slot after the event is delivered to the 325 * application. 326 * 327 * 3. Delivery of events to the event port 328 * Earlier allocated event structure/slot has to be used to deliver 329 * event data to the port. Event source has to use the function 330 * port_send_event(). The single argument is a pointer to the previously 331 * reserved event structure/slot. 332 * The portkev_events field of the port_kevent_t structure can be updated/set 333 * in two ways: 334 * 1. using the port_set_event() function, or 335 * 2. updating the portkev_events field out of the callback function: 336 * The event source can deliver a callback function to the port as an 337 * argument of port_init_event(). 338 * One of the arguments of the callback function is a pointer to the 339 * events field, which will be delivered to the application. 340 * (see Delivery of events to the application). 341 * Event structures/slots can be delivered to the event port only one time, 342 * they remain blocked until the data is delivered to the application and the 343 * slot becomes free or it is delivered back to the event source 344 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above 345 * is at the same time the indicator for the event source that the event 346 * structure/slot is free for reuse. 347 * 348 * 4. Delivery of events to the application 349 * The events structures/slots delivered by event sources remain in the 350 * port queue until they are retrieved by the application or the port 351 * is closed (exit(2) also closes all opened file descriptors).. 352 * The application uses port_get() or port_getn() to retrieve events from 353 * a port. port_get() retrieves a single event structure/slot and port_getn() 354 * retrieves a list of event structures/slots. 355 * Both functions are able to poll for events and return immediately or they 356 * can specify a timeout value. 357 * Before the events are delivered to the application they are moved to a 358 * second temporary internal queue. The idea is to avoid lock collisions or 359 * contentions of the global queue lock. 360 * The global queue lock is used every time when an event source delivers 361 * new events to the port. 362 * The port_get() and port_getn() functions 363 * a) retrieve single events from the temporary queue, 364 * b) prepare the data to be passed to the application memory, 365 * c) activate the callback function of the event sources: 366 * - to get the latest event data, 367 * - the event source can free all allocated resources associated with the 368 * current event, 369 * - the event source can re-use the current event slot/structure 370 * - the event source can deny the delivery of the event to the application 371 * (e.g. because of the wrong process). 372 * d) put the event back to the temporary queue if the event delivery was denied 373 * e) repeat a) until d) as long as there are events in the queue and 374 * there is enough user space available. 375 * 376 * The loop described above could block for a very long time the global mutex, 377 * to avoid that a second mutex was introduced to synchronized concurrent 378 * threads accessing the temporary queue. 379 */ 380 381 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, 382 uintptr_t); 383 384 static struct sysent port_sysent = { 385 6, 386 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 387 (int (*)())portfs, 388 }; 389 390 static struct modlsys modlsys = { 391 &mod_syscallops, "event ports", &port_sysent 392 }; 393 394 #ifdef _SYSCALL32_IMPL 395 396 static int64_t 397 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4, 398 uint32_t arg5, uint32_t arg6); 399 400 static struct sysent port_sysent32 = { 401 6, 402 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 403 (int (*)())portfs32, 404 }; 405 406 static struct modlsys modlsys32 = { 407 &mod_syscallops32, 408 "32-bit event ports syscalls", 409 &port_sysent32 410 }; 411 #endif /* _SYSCALL32_IMPL */ 412 413 static struct modlinkage modlinkage = { 414 MODREV_1, 415 &modlsys, 416 #ifdef _SYSCALL32_IMPL 417 &modlsys32, 418 #endif 419 NULL 420 }; 421 422 port_kstat_t port_kstat = { 423 { "ports", KSTAT_DATA_UINT32 } 424 }; 425 426 dev_t portdev; 427 struct vnodeops *port_vnodeops; 428 struct vfs port_vfs; 429 430 extern rctl_hndl_t rc_process_portev; 431 extern rctl_hndl_t rc_project_portids; 432 extern void aio_close_port(void *, int, pid_t, int); 433 434 /* 435 * This table contains a list of event sources which need a static 436 * association with a port (every port). 437 * The last NULL entry in the table is required to detect "end of table". 438 */ 439 struct port_ksource port_ksource_tab[] = { 440 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL}, 441 {0, NULL, NULL, NULL} 442 }; 443 444 /* local functions */ 445 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *, 446 port_gettimer_t *); 447 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *); 448 static int port_alert(port_t *, int, int, void *); 449 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *); 450 static int port_send(port_t *, int, int, void *); 451 static int port_create(int *); 452 static int port_get_alert(port_alert_t *, port_event_t *); 453 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *); 454 static int *port_errorn(int *, int, int, int); 455 static int port_noshare(void *, int *, pid_t, int, void *); 456 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *, 457 int); 458 static void port_init(port_t *); 459 static void port_remove_alert(port_queue_t *); 460 static void port_add_ksource_local(port_t *, port_ksource_t *); 461 static void port_check_return_cond(port_queue_t *); 462 static void port_dequeue_thread(port_queue_t *, portget_t *); 463 static portget_t *port_queue_thread(port_queue_t *, uint_t); 464 static void port_kstat_init(void); 465 466 #ifdef _SYSCALL32_IMPL 467 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *); 468 #endif 469 470 int 471 _init(void) 472 { 473 static const fs_operation_def_t port_vfsops_template[] = { 474 NULL, NULL 475 }; 476 extern const fs_operation_def_t port_vnodeops_template[]; 477 vfsops_t *port_vfsops; 478 int error; 479 major_t major; 480 481 if ((major = getudev()) == (major_t)-1) 482 return (ENXIO); 483 portdev = makedevice(major, 0); 484 485 /* Create a dummy vfs */ 486 error = vfs_makefsops(port_vfsops_template, &port_vfsops); 487 if (error) { 488 cmn_err(CE_WARN, "port init: bad vfs ops"); 489 return (error); 490 } 491 vfs_setops(&port_vfs, port_vfsops); 492 port_vfs.vfs_flag = VFS_RDONLY; 493 port_vfs.vfs_dev = portdev; 494 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0); 495 496 error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops); 497 if (error) { 498 vfs_freevfsops(port_vfsops); 499 cmn_err(CE_WARN, "port init: bad vnode ops"); 500 return (error); 501 } 502 503 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL); 504 port_control.pc_nents = 0; /* number of active ports */ 505 506 /* create kmem_cache for port event structures */ 507 port_control.pc_cache = kmem_cache_create("port_cache", 508 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 509 510 port_kstat_init(); /* init port kstats */ 511 return (mod_install(&modlinkage)); 512 } 513 514 int 515 _info(struct modinfo *modinfop) 516 { 517 return (mod_info(&modlinkage, modinfop)); 518 } 519 520 /* 521 * System call wrapper for all port related system calls from 32-bit programs. 522 */ 523 #ifdef _SYSCALL32_IMPL 524 static int64_t 525 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, 526 uint32_t a4) 527 { 528 int64_t error; 529 530 switch (opcode & PORT_CODE_MASK) { 531 case PORT_GET: 532 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4); 533 break; 534 case PORT_SENDN: 535 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4); 536 break; 537 default: 538 error = portfs(opcode, a0, a1, a2, a3, a4); 539 break; 540 } 541 return (error); 542 } 543 #endif /* _SYSCALL32_IMPL */ 544 545 /* 546 * System entry point for port functions. 547 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE). 548 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a 549 * port file descriptor as first argument. 550 */ 551 static int64_t 552 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, 553 uintptr_t a4) 554 { 555 rval_t r; 556 port_t *pp; 557 int error = 0; 558 uint_t nget; 559 file_t *fp; 560 port_gettimer_t port_timer; 561 562 r.r_vals = 0; 563 if (opcode & PORT_SYS_NOPORT) { 564 opcode &= PORT_CODE_MASK; 565 if (opcode == PORT_SENDN) { 566 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2, 567 (int)a3, (void *)a4, (uint_t *)&r.r_val1); 568 if (error && (error != EIO)) 569 return ((int64_t)set_errno(error)); 570 return (r.r_vals); 571 } 572 573 if (opcode == PORT_CREATE) { 574 error = port_create(&r.r_val1); 575 if (error) 576 return ((int64_t)set_errno(error)); 577 return (r.r_vals); 578 } 579 } 580 581 /* opcodes using port as first argument (a0) */ 582 583 if ((fp = getf((int)a0)) == NULL) 584 return ((uintptr_t)set_errno(EBADF)); 585 586 if (fp->f_vnode->v_type != VPORT) { 587 releasef((int)a0); 588 return ((uintptr_t)set_errno(EBADFD)); 589 } 590 591 pp = VTOEP(fp->f_vnode); 592 593 switch (opcode & PORT_CODE_MASK) { 594 case PORT_GET: 595 { 596 /* see PORT_GETN description */ 597 struct timespec timeout; 598 599 port_timer.pgt_flags = PORTGET_ONE; 600 port_timer.pgt_loop = 0; 601 port_timer.pgt_rqtp = NULL; 602 if (a4 != 0) { 603 port_timer.pgt_timeout = &timeout; 604 timeout.tv_sec = (time_t)a2; 605 timeout.tv_nsec = (long)a3; 606 } else { 607 port_timer.pgt_timeout = NULL; 608 } 609 do { 610 nget = 1; 611 error = port_getn(pp, (port_event_t *)a1, 1, 612 (uint_t *)&nget, &port_timer); 613 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 614 break; 615 } 616 case PORT_GETN: 617 { 618 /* 619 * port_getn() can only retrieve own or shareable events from 620 * other processes. The port_getn() function remains in the 621 * kernel until own or shareable events are available or the 622 * timeout elapses. 623 */ 624 port_timer.pgt_flags = 0; 625 port_timer.pgt_loop = 0; 626 port_timer.pgt_rqtp = NULL; 627 port_timer.pgt_timeout = (struct timespec *)a4; 628 do { 629 nget = a3; 630 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2, 631 (uint_t *)&nget, &port_timer); 632 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 633 r.r_val1 = nget; 634 r.r_val2 = error; 635 releasef((int)a0); 636 if (error && error != ETIME) 637 return ((int64_t)set_errno(error)); 638 return (r.r_vals); 639 } 640 case PORT_ASSOCIATE: 641 { 642 switch ((int)a1) { 643 case PORT_SOURCE_FD: 644 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, 645 (int)a3, (void *)a4); 646 break; 647 case PORT_SOURCE_FILE: 648 error = port_associate_fop(pp, (int)a1, (uintptr_t)a2, 649 (int)a3, (void *)a4); 650 break; 651 default: 652 error = EINVAL; 653 break; 654 } 655 break; 656 } 657 case PORT_SEND: 658 { 659 /* user-defined events */ 660 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2); 661 break; 662 } 663 case PORT_DISPATCH: 664 { 665 /* 666 * library events, blocking 667 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ 668 * are currently allowed. 669 */ 670 if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) { 671 error = EINVAL; 672 break; 673 } 674 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2, 675 (uintptr_t)a3, (void *)a4); 676 break; 677 } 678 case PORT_DISSOCIATE: 679 { 680 switch ((int)a1) { 681 case PORT_SOURCE_FD: 682 error = port_dissociate_fd(pp, (uintptr_t)a2); 683 break; 684 case PORT_SOURCE_FILE: 685 error = port_dissociate_fop(pp, (uintptr_t)a2); 686 break; 687 default: 688 error = EINVAL; 689 break; 690 } 691 break; 692 } 693 case PORT_ALERT: 694 { 695 if ((int)a2) /* a2 = events */ 696 error = port_alert(pp, (int)a1, (int)a2, (void *)a3); 697 else 698 port_remove_alert(&pp->port_queue); 699 break; 700 } 701 default: 702 error = EINVAL; 703 break; 704 } 705 706 releasef((int)a0); 707 if (error) 708 return ((int64_t)set_errno(error)); 709 return (r.r_vals); 710 } 711 712 /* 713 * System call to create a port. 714 * 715 * The port_create() function creates a vnode of type VPORT per port. 716 * The port control data is associated with the vnode as vnode private data. 717 * The port_create() function returns an event port file descriptor. 718 */ 719 static int 720 port_create(int *fdp) 721 { 722 port_t *pp; 723 vnode_t *vp; 724 struct file *fp; 725 proc_t *p = curproc; 726 727 /* initialize vnode and port private data */ 728 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP); 729 730 pp->port_vnode = vn_alloc(KM_SLEEP); 731 vp = EPTOV(pp); 732 vn_setops(vp, port_vnodeops); 733 vp->v_type = VPORT; 734 vp->v_vfsp = &port_vfs; 735 vp->v_data = (caddr_t)pp; 736 737 mutex_enter(&port_control.pc_mutex); 738 /* 739 * Retrieve the maximal number of event ports allowed per system from 740 * the resource control: project.port-max-ids. 741 */ 742 mutex_enter(&p->p_lock); 743 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p, 744 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) { 745 mutex_exit(&p->p_lock); 746 vn_free(vp); 747 kmem_free(pp, sizeof (port_t)); 748 mutex_exit(&port_control.pc_mutex); 749 return (EAGAIN); 750 } 751 752 /* 753 * Retrieve the maximal number of events allowed per port from 754 * the resource control: process.port-max-events. 755 */ 756 pp->port_max_events = rctl_enforced_value(rc_process_portev, 757 p->p_rctls, p); 758 mutex_exit(&p->p_lock); 759 760 /* allocate a new user file descriptor and a file structure */ 761 if (falloc(vp, 0, &fp, fdp)) { 762 /* 763 * If the file table is full, free allocated resources. 764 */ 765 vn_free(vp); 766 kmem_free(pp, sizeof (port_t)); 767 mutex_exit(&port_control.pc_mutex); 768 return (EMFILE); 769 } 770 771 mutex_exit(&fp->f_tlock); 772 773 pp->port_fd = *fdp; 774 port_control.pc_nents++; 775 p->p_portcnt++; 776 port_kstat.pks_ports.value.ui32++; 777 mutex_exit(&port_control.pc_mutex); 778 779 /* initializes port private data */ 780 port_init(pp); 781 /* set user file pointer */ 782 setf(*fdp, fp); 783 return (0); 784 } 785 786 /* 787 * port_init() initializes event port specific data 788 */ 789 static void 790 port_init(port_t *pp) 791 { 792 port_queue_t *portq; 793 port_ksource_t *pks; 794 795 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL); 796 portq = &pp->port_queue; 797 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL); 798 pp->port_flags |= PORT_INIT; 799 800 /* 801 * If it is not enough memory available to satisfy a user 802 * request using a single port_getn() call then port_getn() 803 * will reduce the size of the list to PORT_MAX_LIST. 804 */ 805 pp->port_max_list = port_max_list; 806 807 /* Set timestamp entries required for fstat(2) requests */ 808 gethrestime(&pp->port_ctime); 809 pp->port_uid = crgetuid(curproc->p_cred); 810 pp->port_gid = crgetgid(curproc->p_cred); 811 812 /* initialize port queue structs */ 813 list_create(&portq->portq_list, sizeof (port_kevent_t), 814 offsetof(port_kevent_t, portkev_node)); 815 list_create(&portq->portq_get_list, sizeof (port_kevent_t), 816 offsetof(port_kevent_t, portkev_node)); 817 portq->portq_flags = 0; 818 pp->port_pid = curproc->p_pid; 819 820 /* Allocate cache skeleton for PORT_SOURCE_FD events */ 821 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP); 822 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL); 823 824 /* 825 * Allocate cache skeleton for association of event sources. 826 */ 827 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL); 828 portq->portq_scache = kmem_zalloc( 829 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP); 830 831 /* 832 * pre-associate some kernel sources with this port. 833 * The pre-association is required to create port_source_t 834 * structures for object association. 835 * Some sources can not get associated with a port before the first 836 * object association is requested. Another reason to pre_associate 837 * a particular source with a port is because of performance. 838 */ 839 840 for (pks = port_ksource_tab; pks->pks_source != 0; pks++) 841 port_add_ksource_local(pp, pks); 842 } 843 844 /* 845 * The port_add_ksource_local() function is being used to associate 846 * event sources with every new port. 847 * The event sources need to be added to port_ksource_tab[]. 848 */ 849 static void 850 port_add_ksource_local(port_t *pp, port_ksource_t *pks) 851 { 852 port_source_t *pse; 853 port_source_t **ps; 854 855 mutex_enter(&pp->port_queue.portq_source_mutex); 856 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)]; 857 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { 858 if (pse->portsrc_source == pks->pks_source) 859 break; 860 } 861 862 if (pse == NULL) { 863 /* associate new source with the port */ 864 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP); 865 pse->portsrc_source = pks->pks_source; 866 pse->portsrc_close = pks->pks_close; 867 pse->portsrc_closearg = pks->pks_closearg; 868 pse->portsrc_cnt = 1; 869 870 pks->pks_portsrc = pse; 871 if (*ps != NULL) 872 pse->portsrc_next = (*ps)->portsrc_next; 873 *ps = pse; 874 } 875 mutex_exit(&pp->port_queue.portq_source_mutex); 876 } 877 878 /* 879 * The port_send() function sends an event of type "source" to a 880 * port. This function is non-blocking. An event can be sent to 881 * a port as long as the number of events per port does not achieve the 882 * maximal allowed number of events. The max. number of events per port is 883 * defined by the resource control process.max-port-events. 884 * This function is used by the port library function port_send() 885 * and port_dispatch(). The port_send(3c) function is part of the 886 * event ports API and submits events of type PORT_SOURCE_USER. The 887 * port_dispatch() function is project private and it is used by library 888 * functions to submit events of other types than PORT_SOURCE_USER 889 * (e.g. PORT_SOURCE_AIO). 890 */ 891 static int 892 port_send(port_t *pp, int source, int events, void *user) 893 { 894 port_kevent_t *pev; 895 int error; 896 897 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev); 898 if (error) 899 return (error); 900 901 pev->portkev_object = 0; 902 pev->portkev_events = events; 903 pev->portkev_user = user; 904 pev->portkev_callback = NULL; 905 pev->portkev_arg = NULL; 906 pev->portkev_flags = 0; 907 908 port_send_event(pev); 909 return (0); 910 } 911 912 /* 913 * The port_noshare() function returns 0 if the current event was generated 914 * by the same process. Otherwise is returns a value other than 0 and the 915 * event should not be delivered to the current processe. 916 * The port_noshare() function is normally used by the port_dispatch() 917 * function. The port_dispatch() function is project private and can only be 918 * used within the event port project. 919 * Currently the libaio uses the port_dispatch() function to deliver events 920 * of types PORT_SOURCE_AIO. 921 */ 922 /* ARGSUSED */ 923 static int 924 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp) 925 { 926 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid) 927 return (1); 928 return (0); 929 } 930 931 /* 932 * The port_dispatch_event() function is project private and it is used by 933 * libraries involved in the project to deliver events to the port. 934 * port_dispatch will sleep and wait for enough resources to satisfy the 935 * request, if necessary. 936 * The library can specify if the delivered event is shareable with other 937 * processes (see PORT_SYS_NOSHARE flag). 938 */ 939 static int 940 port_dispatch_event(port_t *pp, int opcode, int source, int events, 941 uintptr_t object, void *user) 942 { 943 port_kevent_t *pev; 944 int error; 945 946 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev); 947 if (error) 948 return (error); 949 950 pev->portkev_object = object; 951 pev->portkev_events = events; 952 pev->portkev_user = user; 953 pev->portkev_arg = NULL; 954 if (opcode & PORT_SYS_NOSHARE) { 955 pev->portkev_flags = PORT_KEV_NOSHARE; 956 pev->portkev_callback = port_noshare; 957 } else { 958 pev->portkev_flags = 0; 959 pev->portkev_callback = NULL; 960 } 961 962 port_send_event(pev); 963 return (0); 964 } 965 966 967 /* 968 * The port_sendn() function is the kernel implementation of the event 969 * port API function port_sendn(3c). 970 * This function is able to send an event to a list of event ports. 971 */ 972 static int 973 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user, 974 uint_t *nget) 975 { 976 port_kevent_t *pev; 977 int errorcnt = 0; 978 int error = 0; 979 int count; 980 int port; 981 int *plist; 982 int *elist = NULL; 983 file_t *fp; 984 port_t *pp; 985 986 if (nent == 0 || nent > port_max_list) 987 return (EINVAL); 988 989 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP); 990 if (copyin((void *)ports, plist, nent * sizeof (int))) { 991 kmem_free(plist, nent * sizeof (int)); 992 return (EFAULT); 993 } 994 995 /* 996 * Scan the list for event port file descriptors and send the 997 * attached user event data embedded in a event of type 998 * PORT_SOURCE_USER to every event port in the list. 999 * If a list entry is not a valid event port then the corresponding 1000 * error code will be stored in the errors[] list with the same 1001 * list offset as in the ports[] list. 1002 */ 1003 1004 for (count = 0; count < nent; count++) { 1005 port = plist[count]; 1006 if ((fp = getf(port)) == NULL) { 1007 elist = port_errorn(elist, nent, EBADF, count); 1008 errorcnt++; 1009 continue; 1010 } 1011 1012 pp = VTOEP(fp->f_vnode); 1013 if (fp->f_vnode->v_type != VPORT) { 1014 releasef(port); 1015 elist = port_errorn(elist, nent, EBADFD, count); 1016 errorcnt++; 1017 continue; 1018 } 1019 1020 error = port_alloc_event_local(pp, PORT_SOURCE_USER, 1021 PORT_ALLOC_DEFAULT, &pev); 1022 if (error) { 1023 releasef(port); 1024 elist = port_errorn(elist, nent, error, count); 1025 errorcnt++; 1026 continue; 1027 } 1028 1029 pev->portkev_object = 0; 1030 pev->portkev_events = events; 1031 pev->portkev_user = user; 1032 pev->portkev_callback = NULL; 1033 pev->portkev_arg = NULL; 1034 pev->portkev_flags = 0; 1035 1036 port_send_event(pev); 1037 releasef(port); 1038 } 1039 if (errorcnt) { 1040 error = EIO; 1041 if (copyout(elist, (void *)errors, nent * sizeof (int))) 1042 error = EFAULT; 1043 kmem_free(elist, nent * sizeof (int)); 1044 } 1045 *nget = nent - errorcnt; 1046 kmem_free(plist, nent * sizeof (int)); 1047 return (error); 1048 } 1049 1050 static int * 1051 port_errorn(int *elist, int nent, int error, int index) 1052 { 1053 if (elist == NULL) 1054 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP); 1055 elist[index] = error; 1056 return (elist); 1057 } 1058 1059 /* 1060 * port_alert() 1061 * The port_alert() funcion is a high priority event and it is always set 1062 * on top of the queue. It is also delivered as single event. 1063 * flags: 1064 * - SET :overwrite current alert data 1065 * - UPDATE:set alert data or return EBUSY if alert mode is already set 1066 * 1067 * - set the ALERT flag 1068 * - wakeup all sleeping threads 1069 */ 1070 static int 1071 port_alert(port_t *pp, int flags, int events, void *user) 1072 { 1073 port_queue_t *portq; 1074 portget_t *pgetp; 1075 port_alert_t *pa; 1076 1077 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID) 1078 return (EINVAL); 1079 1080 portq = &pp->port_queue; 1081 pa = &portq->portq_alert; 1082 mutex_enter(&portq->portq_mutex); 1083 1084 /* check alert conditions */ 1085 if (flags == PORT_ALERT_UPDATE) { 1086 if (portq->portq_flags & PORTQ_ALERT) { 1087 mutex_exit(&portq->portq_mutex); 1088 return (EBUSY); 1089 } 1090 } 1091 1092 /* 1093 * Store alert data in the port to be delivered to threads 1094 * which are using port_get(n) to retrieve events. 1095 */ 1096 1097 portq->portq_flags |= PORTQ_ALERT; 1098 pa->portal_events = events; /* alert info */ 1099 pa->portal_pid = curproc->p_pid; /* process owner */ 1100 pa->portal_object = 0; /* no object */ 1101 pa->portal_user = user; /* user alert data */ 1102 1103 /* alert and deliver alert data to waiting threads */ 1104 pgetp = portq->portq_thread; 1105 if (pgetp == NULL) { 1106 /* no threads waiting for events */ 1107 mutex_exit(&portq->portq_mutex); 1108 return (0); 1109 } 1110 1111 /* 1112 * Set waiting threads in alert mode (PORTGET_ALERT).. 1113 * Every thread waiting for events already allocated a portget_t 1114 * structure to sleep on. 1115 * The port alert arguments are stored in the portget_t structure. 1116 * The PORTGET_ALERT flag is set to indicate the thread to return 1117 * immediately with the alert event. 1118 */ 1119 do { 1120 if ((pgetp->portget_state & PORTGET_ALERT) == 0) { 1121 pa = &pgetp->portget_alert; 1122 pa->portal_events = events; 1123 pa->portal_object = 0; 1124 pa->portal_user = user; 1125 pgetp->portget_state |= PORTGET_ALERT; 1126 cv_signal(&pgetp->portget_cv); 1127 } 1128 } while ((pgetp = pgetp->portget_next) != portq->portq_thread); 1129 mutex_exit(&portq->portq_mutex); 1130 return (0); 1131 } 1132 1133 /* 1134 * Clear alert state of the port 1135 */ 1136 static void 1137 port_remove_alert(port_queue_t *portq) 1138 { 1139 mutex_enter(&portq->portq_mutex); 1140 portq->portq_flags &= ~PORTQ_ALERT; 1141 mutex_exit(&portq->portq_mutex); 1142 } 1143 1144 /* 1145 * The port_getn() function is used to retrieve events from a port. 1146 * 1147 * The port_getn() function returns immediately if there are enough events 1148 * available in the port to satisfy the request or if the port is in alert 1149 * mode (see port_alert(3c)). 1150 * The timeout argument of port_getn(3c) -which is embedded in the 1151 * port_gettimer_t structure- specifies if the system call should block or if it 1152 * should return immediately depending on the number of events available. 1153 * This function is internally used by port_getn(3c) as well as by 1154 * port_get(3c). 1155 */ 1156 static int 1157 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget, 1158 port_gettimer_t *pgt) 1159 { 1160 port_queue_t *portq; 1161 port_kevent_t *pev; 1162 port_kevent_t *lev; 1163 int error = 0; 1164 uint_t nmax; 1165 uint_t nevents; 1166 uint_t eventsz; 1167 port_event_t *kevp; 1168 list_t *glist; 1169 uint_t tnent; 1170 int rval; 1171 int blocking = -1; 1172 int timecheck; 1173 int flag; 1174 timespec_t rqtime; 1175 timespec_t *rqtp = NULL; 1176 portget_t *pgetp; 1177 void *results; 1178 model_t model = get_udatamodel(); 1179 1180 flag = pgt->pgt_flags; 1181 1182 if (*nget > max && max > 0) 1183 return (EINVAL); 1184 1185 portq = &pp->port_queue; 1186 mutex_enter(&portq->portq_mutex); 1187 if (max == 0) { 1188 /* 1189 * Return number of objects with events. 1190 * The port_block() call is required to synchronize this 1191 * thread with another possible thread, which could be 1192 * retrieving events from the port queue. 1193 */ 1194 port_block(portq); 1195 /* 1196 * Check if a second thread is currently retrieving events 1197 * and it is using the temporary event queue. 1198 */ 1199 if (portq->portq_tnent) { 1200 /* put remaining events back to the port queue */ 1201 port_push_eventq(portq); 1202 } 1203 *nget = portq->portq_nent; 1204 port_unblock(portq); 1205 mutex_exit(&portq->portq_mutex); 1206 return (0); 1207 } 1208 1209 if (uevp == NULL) { 1210 mutex_exit(&portq->portq_mutex); 1211 return (EFAULT); 1212 } 1213 if (*nget == 0) { /* no events required */ 1214 mutex_exit(&portq->portq_mutex); 1215 return (0); 1216 } 1217 1218 /* port is being closed ... */ 1219 if (portq->portq_flags & PORTQ_CLOSE) { 1220 mutex_exit(&portq->portq_mutex); 1221 return (EBADFD); 1222 } 1223 1224 /* return immediately if port in alert mode */ 1225 if (portq->portq_flags & PORTQ_ALERT) { 1226 error = port_get_alert(&portq->portq_alert, uevp); 1227 if (error == 0) 1228 *nget = 1; 1229 mutex_exit(&portq->portq_mutex); 1230 return (error); 1231 } 1232 1233 portq->portq_thrcnt++; 1234 1235 /* 1236 * Now check if the completed events satisfy the 1237 * "wait" requirements of the current thread: 1238 */ 1239 1240 if (pgt->pgt_loop) { 1241 /* 1242 * loop entry of same thread 1243 * pgt_loop is set when the current thread returns 1244 * prematurely from this function. That could happen 1245 * when a port is being shared between processes and 1246 * this thread could not find events to return. 1247 * It is not allowed to a thread to retrieve non-shareable 1248 * events generated in other processes. 1249 * PORTQ_WAIT_EVENTS is set when a thread already 1250 * checked the current event queue and no new events 1251 * are added to the queue. 1252 */ 1253 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) && 1254 (portq->portq_nent >= *nget)) { 1255 /* some new events arrived ...check them */ 1256 goto portnowait; 1257 } 1258 rqtp = pgt->pgt_rqtp; 1259 timecheck = pgt->pgt_timecheck; 1260 pgt->pgt_flags |= PORTGET_WAIT_EVENTS; 1261 } else { 1262 /* check if enough events are available ... */ 1263 if (portq->portq_nent >= *nget) 1264 goto portnowait; 1265 /* 1266 * There are not enough events available to satisfy 1267 * the request, check timeout value and wait for 1268 * incoming events. 1269 */ 1270 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp, 1271 &blocking, flag); 1272 if (error) { 1273 port_check_return_cond(portq); 1274 mutex_exit(&portq->portq_mutex); 1275 return (error); 1276 } 1277 1278 if (blocking == 0) /* don't block, check fired events */ 1279 goto portnowait; 1280 1281 if (rqtp != NULL) { 1282 timespec_t now; 1283 timecheck = timechanged; 1284 gethrestime(&now); 1285 timespecadd(rqtp, &now); 1286 } 1287 } 1288 1289 /* enqueue thread in the list of waiting threads */ 1290 pgetp = port_queue_thread(portq, *nget); 1291 1292 1293 /* Wait here until return conditions met */ 1294 for (;;) { 1295 if (pgetp->portget_state & PORTGET_ALERT) { 1296 /* reap alert event and return */ 1297 error = port_get_alert(&pgetp->portget_alert, uevp); 1298 if (error) 1299 *nget = 0; 1300 else 1301 *nget = 1; 1302 port_dequeue_thread(&pp->port_queue, pgetp); 1303 portq->portq_thrcnt--; 1304 mutex_exit(&portq->portq_mutex); 1305 return (error); 1306 } 1307 1308 /* 1309 * Check if some other thread is already retrieving 1310 * events (portq_getn > 0). 1311 */ 1312 1313 if ((portq->portq_getn == 0) && 1314 ((portq)->portq_nent >= *nget) && 1315 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) || 1316 !((portq)->portq_flags & PORTQ_WAIT_EVENTS))) 1317 break; 1318 1319 if (portq->portq_flags & PORTQ_CLOSE) { 1320 error = EBADFD; 1321 break; 1322 } 1323 1324 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex, 1325 rqtp, timecheck); 1326 1327 if (rval <= 0) { 1328 error = (rval == 0) ? EINTR : ETIME; 1329 break; 1330 } 1331 } 1332 1333 /* take thread out of the wait queue */ 1334 port_dequeue_thread(portq, pgetp); 1335 1336 if (error != 0 && (error == EINTR || error == EBADFD || 1337 (error == ETIME && flag))) { 1338 /* return without events */ 1339 port_check_return_cond(portq); 1340 mutex_exit(&portq->portq_mutex); 1341 return (error); 1342 } 1343 1344 portnowait: 1345 /* 1346 * Move port event queue to a temporary event queue . 1347 * New incoming events will be continue be posted to the event queue 1348 * and they will not be considered by the current thread. 1349 * The idea is to avoid lock contentions or an often locking/unlocking 1350 * of the port queue mutex. The contention and performance degradation 1351 * could happen because: 1352 * a) incoming events use the port queue mutex to enqueue new events and 1353 * b) before the event can be delivered to the application it is 1354 * necessary to notify the event sources about the event delivery. 1355 * Sometimes the event sources can require a long time to return and 1356 * the queue mutex would block incoming events. 1357 * During this time incoming events (port_send_event()) do not need 1358 * to awake threads waiting for events. Before the current thread 1359 * returns it will check the conditions to awake other waiting threads. 1360 */ 1361 portq->portq_getn++; /* number of threads retrieving events */ 1362 port_block(portq); /* block other threads here */ 1363 nmax = max < portq->portq_nent ? max : portq->portq_nent; 1364 1365 if (portq->portq_tnent) { 1366 /* 1367 * Move remaining events from previous thread back to the 1368 * port event queue. 1369 */ 1370 port_push_eventq(portq); 1371 } 1372 /* move port event queue to a temporary queue */ 1373 list_move_tail(&portq->portq_get_list, &portq->portq_list); 1374 glist = &portq->portq_get_list; /* use temporary event queue */ 1375 tnent = portq->portq_nent; /* get current number of events */ 1376 portq->portq_nent = 0; /* no events in the port event queue */ 1377 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */ 1378 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */ 1379 1380 if (model == DATAMODEL_NATIVE) { 1381 eventsz = sizeof (port_event_t); 1382 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1383 if (kevp == NULL) { 1384 if (nmax > pp->port_max_list) 1385 nmax = pp->port_max_list; 1386 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP); 1387 } 1388 results = kevp; 1389 lev = NULL; /* start with first event in the queue */ 1390 for (nevents = 0; nevents < nmax; ) { 1391 pev = port_get_kevent(glist, lev); 1392 if (pev == NULL) /* no more events available */ 1393 break; 1394 if (pev->portkev_flags & PORT_KEV_FREE) { 1395 /* Just discard event */ 1396 list_remove(glist, pev); 1397 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1398 if (PORT_FREE_EVENT(pev)) 1399 port_free_event_local(pev, 0); 1400 tnent--; 1401 continue; 1402 } 1403 1404 /* move event data to copyout list */ 1405 if (port_copy_event(&kevp[nevents], pev, glist)) { 1406 /* 1407 * Event can not be delivered to the 1408 * current process. 1409 */ 1410 if (lev != NULL) 1411 list_insert_after(glist, lev, pev); 1412 else 1413 list_insert_head(glist, pev); 1414 lev = pev; /* last checked event */ 1415 } else { 1416 nevents++; /* # of events ready */ 1417 } 1418 } 1419 #ifdef _SYSCALL32_IMPL 1420 } else { 1421 port_event32_t *kevp32; 1422 1423 eventsz = sizeof (port_event32_t); 1424 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1425 if (kevp32 == NULL) { 1426 if (nmax > pp->port_max_list) 1427 nmax = pp->port_max_list; 1428 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP); 1429 } 1430 results = kevp32; 1431 lev = NULL; /* start with first event in the queue */ 1432 for (nevents = 0; nevents < nmax; ) { 1433 pev = port_get_kevent(glist, lev); 1434 if (pev == NULL) /* no more events available */ 1435 break; 1436 if (pev->portkev_flags & PORT_KEV_FREE) { 1437 /* Just discard event */ 1438 list_remove(glist, pev); 1439 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1440 if (PORT_FREE_EVENT(pev)) 1441 port_free_event_local(pev, 0); 1442 tnent--; 1443 continue; 1444 } 1445 1446 /* move event data to copyout list */ 1447 if (port_copy_event32(&kevp32[nevents], pev, glist)) { 1448 /* 1449 * Event can not be delivered to the 1450 * current process. 1451 */ 1452 if (lev != NULL) 1453 list_insert_after(glist, lev, pev); 1454 else 1455 list_insert_head(glist, pev); 1456 lev = pev; /* last checked event */ 1457 } else { 1458 nevents++; /* # of events ready */ 1459 } 1460 } 1461 #endif /* _SYSCALL32_IMPL */ 1462 } 1463 1464 /* 1465 * Remember number of remaining events in the temporary event queue. 1466 */ 1467 portq->portq_tnent = tnent - nevents; 1468 1469 /* 1470 * Work to do before return : 1471 * - push list of remaining events back to the top of the standard 1472 * port queue. 1473 * - if this is the last thread calling port_get(n) then wakeup the 1474 * thread waiting on close(2). 1475 * - check for a deferred cv_signal from port_send_event() and wakeup 1476 * the sleeping thread. 1477 */ 1478 1479 mutex_enter(&portq->portq_mutex); 1480 port_unblock(portq); 1481 if (portq->portq_tnent) { 1482 /* 1483 * move remaining events in the temporary event queue back 1484 * to the port event queue 1485 */ 1486 port_push_eventq(portq); 1487 } 1488 portq->portq_getn--; /* update # of threads retrieving events */ 1489 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */ 1490 /* Last thread => check close(2) conditions ... */ 1491 if (portq->portq_flags & PORTQ_CLOSE) { 1492 cv_signal(&portq->portq_closecv); 1493 mutex_exit(&portq->portq_mutex); 1494 kmem_free(results, eventsz * nmax); 1495 /* do not copyout events */ 1496 *nget = 0; 1497 return (EBADFD); 1498 } 1499 } else if (portq->portq_getn == 0) { 1500 /* 1501 * no other threads retrieving events ... 1502 * check wakeup conditions of sleeping threads 1503 */ 1504 if ((portq->portq_thread != NULL) && 1505 (portq->portq_nent >= portq->portq_nget)) 1506 cv_signal(&portq->portq_thread->portget_cv); 1507 } 1508 1509 /* 1510 * Check PORTQ_POLLIN here because the current thread set temporarily 1511 * the number of events in the queue to zero. 1512 */ 1513 if (portq->portq_flags & PORTQ_POLLIN) { 1514 portq->portq_flags &= ~PORTQ_POLLIN; 1515 mutex_exit(&portq->portq_mutex); 1516 pollwakeup(&pp->port_pollhd, POLLIN); 1517 } else { 1518 mutex_exit(&portq->portq_mutex); 1519 } 1520 1521 /* now copyout list of user event structures to user space */ 1522 if (nevents) { 1523 if (copyout(results, uevp, nevents * eventsz)) 1524 error = EFAULT; 1525 } 1526 kmem_free(results, eventsz * nmax); 1527 1528 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) { 1529 /* no events retrieved: check loop conditions */ 1530 if (blocking == -1) { 1531 /* no timeout checked */ 1532 error = port_get_timeout(pgt->pgt_timeout, 1533 &pgt->pgt_rqtime, &rqtp, &blocking, flag); 1534 if (error) { 1535 *nget = nevents; 1536 return (error); 1537 } 1538 if (rqtp != NULL) { 1539 timespec_t now; 1540 pgt->pgt_timecheck = timechanged; 1541 gethrestime(&now); 1542 timespecadd(&pgt->pgt_rqtime, &now); 1543 } 1544 pgt->pgt_rqtp = rqtp; 1545 } else { 1546 /* timeout already checked -> remember values */ 1547 pgt->pgt_rqtp = rqtp; 1548 if (rqtp != NULL) { 1549 pgt->pgt_timecheck = timecheck; 1550 pgt->pgt_rqtime = *rqtp; 1551 } 1552 } 1553 if (blocking) 1554 /* timeout remaining */ 1555 pgt->pgt_loop = 1; 1556 } 1557 1558 /* set number of user event structures completed */ 1559 *nget = nevents; 1560 return (error); 1561 } 1562 1563 /* 1564 * 1. copy kernel event structure to user event structure. 1565 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1566 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1567 * 4. Other types of event structures can be delivered back to the port cache 1568 * (port_free_event_local()). 1569 * 5. The event source callback function is the last opportunity for the 1570 * event source to update events, to free local resources associated with 1571 * the event or to deny the delivery of the event. 1572 */ 1573 static int 1574 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list) 1575 { 1576 int free_event = 0; 1577 int flags; 1578 int error; 1579 1580 puevp->portev_source = pkevp->portkev_source; 1581 puevp->portev_object = pkevp->portkev_object; 1582 puevp->portev_user = pkevp->portkev_user; 1583 puevp->portev_events = pkevp->portkev_events; 1584 1585 /* remove event from the queue */ 1586 list_remove(list, pkevp); 1587 1588 /* 1589 * Events of type PORT_KEV_WIRED remain allocated by the 1590 * event source. 1591 */ 1592 flags = pkevp->portkev_flags; 1593 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1594 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1595 else 1596 free_event = 1; 1597 1598 if (pkevp->portkev_callback) { 1599 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1600 &puevp->portev_events, pkevp->portkev_pid, 1601 PORT_CALLBACK_DEFAULT, pkevp); 1602 1603 if (error) { 1604 /* 1605 * Event can not be delivered. 1606 * Caller must reinsert the event into the queue. 1607 */ 1608 pkevp->portkev_flags = flags; 1609 return (error); 1610 } 1611 } 1612 if (free_event) 1613 port_free_event_local(pkevp, 0); 1614 return (0); 1615 } 1616 1617 #ifdef _SYSCALL32_IMPL 1618 /* 1619 * 1. copy kernel event structure to user event structure. 1620 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1621 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1622 * 4. Other types of event structures can be delivered back to the port cache 1623 * (port_free_event_local()). 1624 * 5. The event source callback function is the last opportunity for the 1625 * event source to update events, to free local resources associated with 1626 * the event or to deny the delivery of the event. 1627 */ 1628 static int 1629 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list) 1630 { 1631 int free_event = 0; 1632 int error; 1633 int flags; 1634 1635 puevp->portev_source = pkevp->portkev_source; 1636 puevp->portev_object = (daddr32_t)pkevp->portkev_object; 1637 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user; 1638 puevp->portev_events = pkevp->portkev_events; 1639 1640 /* remove event from the queue */ 1641 list_remove(list, pkevp); 1642 1643 /* 1644 * Events if type PORT_KEV_WIRED remain allocated by the 1645 * sub-system (source). 1646 */ 1647 1648 flags = pkevp->portkev_flags; 1649 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1650 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1651 else 1652 free_event = 1; 1653 1654 if (pkevp->portkev_callback != NULL) { 1655 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1656 &puevp->portev_events, pkevp->portkev_pid, 1657 PORT_CALLBACK_DEFAULT, pkevp); 1658 if (error) { 1659 /* 1660 * Event can not be delivered. 1661 * Caller must reinsert the event into the queue. 1662 */ 1663 pkevp->portkev_flags = flags; 1664 return (error); 1665 } 1666 } 1667 if (free_event) 1668 port_free_event_local(pkevp, 0); 1669 return (0); 1670 } 1671 #endif /* _SYSCALL32_IMPL */ 1672 1673 /* 1674 * copyout alert event. 1675 */ 1676 static int 1677 port_get_alert(port_alert_t *pa, port_event_t *uevp) 1678 { 1679 model_t model = get_udatamodel(); 1680 1681 /* copyout alert event structures to user space */ 1682 if (model == DATAMODEL_NATIVE) { 1683 port_event_t uev; 1684 uev.portev_source = PORT_SOURCE_ALERT; 1685 uev.portev_object = pa->portal_object; 1686 uev.portev_events = pa->portal_events; 1687 uev.portev_user = pa->portal_user; 1688 if (copyout(&uev, uevp, sizeof (port_event_t))) 1689 return (EFAULT); 1690 #ifdef _SYSCALL32_IMPL 1691 } else { 1692 port_event32_t uev32; 1693 uev32.portev_source = PORT_SOURCE_ALERT; 1694 uev32.portev_object = (daddr32_t)pa->portal_object; 1695 uev32.portev_events = pa->portal_events; 1696 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user; 1697 if (copyout(&uev32, uevp, sizeof (port_event32_t))) 1698 return (EFAULT); 1699 #endif /* _SYSCALL32_IMPL */ 1700 } 1701 return (0); 1702 } 1703 1704 /* 1705 * Check return conditions : 1706 * - pending port close(2) 1707 * - threads waiting for events 1708 */ 1709 static void 1710 port_check_return_cond(port_queue_t *portq) 1711 { 1712 ASSERT(MUTEX_HELD(&portq->portq_mutex)); 1713 portq->portq_thrcnt--; 1714 if (portq->portq_flags & PORTQ_CLOSE) { 1715 if (portq->portq_thrcnt == 0) 1716 cv_signal(&portq->portq_closecv); 1717 else 1718 cv_signal(&portq->portq_thread->portget_cv); 1719 } 1720 } 1721 1722 /* 1723 * The port_get_kevent() function returns 1724 * - the event located at the head of the queue if 'last' pointer is NULL 1725 * - the next event after the event pointed by 'last' 1726 * The caller of this function is responsible for the integrity of the queue 1727 * in use: 1728 * - port_getn() is using a temporary queue protected with port_block(). 1729 * - port_close_events() is working on the global event queue and protects 1730 * the queue with portq->portq_mutex. 1731 */ 1732 port_kevent_t * 1733 port_get_kevent(list_t *list, port_kevent_t *last) 1734 { 1735 if (last == NULL) 1736 return (list_head(list)); 1737 else 1738 return (list_next(list, last)); 1739 } 1740 1741 /* 1742 * The port_get_timeout() function gets the timeout data from user space 1743 * and converts that info into a corresponding internal representation. 1744 * The kerneldata flag means that the timeout data is already loaded. 1745 */ 1746 static int 1747 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp, 1748 int *blocking, int kerneldata) 1749 { 1750 model_t model = get_udatamodel(); 1751 1752 *rqtp = NULL; 1753 if (timeout == NULL) { 1754 *blocking = 1; 1755 return (0); 1756 } 1757 1758 if (kerneldata) { 1759 *rqtime = *timeout; 1760 } else { 1761 if (model == DATAMODEL_NATIVE) { 1762 if (copyin(timeout, rqtime, sizeof (*rqtime))) 1763 return (EFAULT); 1764 #ifdef _SYSCALL32_IMPL 1765 } else { 1766 timespec32_t wait_time_32; 1767 if (copyin(timeout, &wait_time_32, 1768 sizeof (wait_time_32))) 1769 return (EFAULT); 1770 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 1771 #endif /* _SYSCALL32_IMPL */ 1772 } 1773 } 1774 1775 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 1776 *blocking = 0; 1777 return (0); 1778 } 1779 1780 if (rqtime->tv_sec < 0 || 1781 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 1782 return (EINVAL); 1783 1784 *rqtp = rqtime; 1785 *blocking = 1; 1786 return (0); 1787 } 1788 1789 /* 1790 * port_queue_thread() 1791 * Threads requiring more events than available will be put in a wait queue. 1792 * There is a "thread wait queue" per port. 1793 * Threads requiring less events get a higher priority than others and they 1794 * will be awoken first. 1795 */ 1796 static portget_t * 1797 port_queue_thread(port_queue_t *portq, uint_t nget) 1798 { 1799 portget_t *pgetp; 1800 portget_t *ttp; 1801 portget_t *htp; 1802 1803 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP); 1804 pgetp->portget_nget = nget; 1805 pgetp->portget_pid = curproc->p_pid; 1806 if (portq->portq_thread == NULL) { 1807 /* first waiting thread */ 1808 portq->portq_thread = pgetp; 1809 portq->portq_nget = nget; 1810 pgetp->portget_prev = pgetp; 1811 pgetp->portget_next = pgetp; 1812 return (pgetp); 1813 } 1814 1815 /* 1816 * thread waiting for less events will be set on top of the queue. 1817 */ 1818 ttp = portq->portq_thread; 1819 htp = ttp; 1820 for (;;) { 1821 if (nget <= ttp->portget_nget) 1822 break; 1823 if (htp == ttp->portget_next) 1824 break; /* last event */ 1825 ttp = ttp->portget_next; 1826 } 1827 1828 /* add thread to the queue */ 1829 pgetp->portget_next = ttp; 1830 pgetp->portget_prev = ttp->portget_prev; 1831 ttp->portget_prev->portget_next = pgetp; 1832 ttp->portget_prev = pgetp; 1833 if (portq->portq_thread == ttp) 1834 portq->portq_thread = pgetp; 1835 portq->portq_nget = portq->portq_thread->portget_nget; 1836 return (pgetp); 1837 } 1838 1839 /* 1840 * Take thread out of the queue. 1841 */ 1842 static void 1843 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp) 1844 { 1845 if (pgetp->portget_next == pgetp) { 1846 /* last (single) waiting thread */ 1847 portq->portq_thread = NULL; 1848 portq->portq_nget = 0; 1849 } else { 1850 pgetp->portget_prev->portget_next = pgetp->portget_next; 1851 pgetp->portget_next->portget_prev = pgetp->portget_prev; 1852 if (portq->portq_thread == pgetp) 1853 portq->portq_thread = pgetp->portget_next; 1854 portq->portq_nget = portq->portq_thread->portget_nget; 1855 } 1856 kmem_free(pgetp, sizeof (portget_t)); 1857 } 1858 1859 /* 1860 * Set up event port kstats. 1861 */ 1862 static void 1863 port_kstat_init() 1864 { 1865 kstat_t *ksp; 1866 uint_t ndata; 1867 1868 ndata = sizeof (port_kstat) / sizeof (kstat_named_t); 1869 ksp = kstat_create("portfs", 0, "Event Ports", "misc", 1870 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL); 1871 if (ksp) { 1872 ksp->ks_data = &port_kstat; 1873 kstat_install(ksp); 1874 } 1875 } 1876