1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
29 * Copyright 2022 Oxide Computer Company
30 */
31
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/modctl.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/sysmacros.h>
39 #include <sys/cmn_err.h>
40 #include <sys/stat.h>
41 #include <sys/errno.h>
42 #include <sys/kmem.h>
43 #include <sys/file.h>
44 #include <sys/kstat.h>
45 #include <sys/port_impl.h>
46 #include <sys/task.h>
47 #include <sys/project.h>
48
49 /*
50 * Event Ports can be shared across threads or across processes.
51 * Every thread/process can use an own event port or a group of them
52 * can use a single port. A major request was also to get the ability
53 * to submit user-defined events to a port. The idea of the
54 * user-defined events is to use the event ports for communication between
55 * threads/processes (like message queues). User defined-events are queued
56 * in a port with the same priority as other event types.
57 *
58 * Events are delivered only once. The thread/process which is waiting
59 * for events with the "highest priority" (priority here is related to the
60 * internal strategy to wakeup waiting threads) will retrieve the event,
61 * all other threads/processes will not be notified. There is also
62 * the requirement to have events which should be submitted immediately
63 * to all "waiting" threads. That is the main task of the alert event.
64 * The alert event is submitted by the application to a port. The port
65 * changes from a standard mode to the alert mode. Now all waiting threads
66 * will be awaken immediately and they will return with the alert event.
67 * Threads trying to retrieve events from a port in alert mode will
68 * return immediately with the alert event.
69 *
70 *
71 * An event port is like a kernel queue, which accept events submitted from
72 * user level as well as events submitted from kernel sub-systems. Sub-systems
73 * able to submit events to a port are the so-called "event sources".
74 * Current event sources:
75 * PORT_SOURCE_AIO : events submitted per transaction completion from
76 * POSIX-I/O framework.
77 * PORT_SOURCE_TIMER : events submitted when a timer fires
78 * (see timer_create(3RT)).
79 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)).
80 * PORT_SOURCE_ALERT : events submitted from user. This is not really a
81 * single event, this is actually a port mode
82 * (see port_alert(3c)).
83 * PORT_SOURCE_USER : events submitted by applications with
84 * port_send(3c) or port_sendn(3c).
85 * PORT_SOURCE_FILE : events submitted per file being watched for file
86 * change events (see port_create(3c).
87 *
88 * There is a user API implemented in the libc library as well as a
89 * kernel API implemented in port_subr.c in genunix.
90 * The available user API functions are:
91 * port_create() : create a port as a file descriptor of portfs file system
92 * The standard close(2) function closes a port.
93 * port_associate() : associate a file descriptor with a port to be able to
94 * retrieve events from that file descriptor.
95 * port_dissociate(): remove the association of a file descriptor with a port.
96 * port_alert() : set/unset a port in alert mode
97 * port_send() : send an event of type PORT_SOURCE_USER to a port
98 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports
99 * port_get() : retrieve a single event from a port
100 * port_getn() : retrieve a list of events from a port
101 *
102 * The available kernel API functions are:
103 * port_allocate_event(): allocate an event slot/structure of/from a port
104 * port_init_event() : set event data in the event structure
105 * port_send_event() : send event to a port
106 * port_free_event() : deliver allocated slot/structure back to a port
107 * port_associate_ksource(): associate a kernel event source with a port
108 * port_dissociate_ksource(): dissociate a kernel event source from a port
109 *
110 * The libc implementation consists of small functions which pass the
111 * arguments to the kernel using the "portfs" system call. It means, all the
112 * synchronisation work is being done in the kernel. The "portfs" system
113 * call loads the portfs file system into the kernel.
114 *
115 * PORT CREATION
116 * The first function to be used is port_create() which internally creates
117 * a vnode and a portfs node. The portfs node is represented by the port_t
118 * structure, which again includes all the data necessary to control a port.
119 * port_create() returns a file descriptor, which needs to be used in almost
120 * all other event port functions.
121 * The maximum number of ports per system is controlled by the resource
122 * control: project:port-max-ids.
123 *
124 * EVENT GENERATION
125 * The second step is the triggering of events, which could be sent to a port.
126 * Every event source implements an own method to generate events for a port:
127 * PORT_SOURCE_AIO:
128 * The sigevent structure of the standard POSIX-IO functions
129 * was extended by an additional notification type.
130 * Standard notification types:
131 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD
132 * Event ports introduced now SIGEV_PORT.
133 * The notification type SIGEV_PORT specifies that a structure
134 * of type port_notify_t has to be attached to the sigev_value.
135 * The port_notify_t structure contains the event port file
136 * descriptor and a user-defined pointer.
137 * Internally the AIO implementation will use the kernel API
138 * functions to allocate an event port slot per transaction (aiocb)
139 * and sent the event to the port as soon as the transaction completes.
140 * All the events submitted per transaction are of type
141 * PORT_SOURCE_AIO.
142 * PORT_SOURCE_TIMER:
143 * The timer_create() function uses the same method as the
144 * PORT_SOURCE_AIO event source. It also uses the sigevent structure
145 * to deliver the port information.
146 * Internally the timer code will allocate a single event slot/struct
147 * per timer and it will send the timer event as soon as the timer
148 * fires. If the timer-fired event is not delivered to the application
149 * before the next period elapsed, then an overrun counter will be
150 * incremented. The timer event source uses a callback function to
151 * detect the delivery of the event to the application. At that time
152 * the timer callback function will update the event overrun counter.
153 * PORT_SOURCE_FD:
154 * This event source uses the port_associate() function to allocate
155 * an event slot/struct from a port. The application defines in the
156 * events argument of port_associate() the type of events which it is
157 * interested on.
158 * The internal pollwakeup() function is used by all the file
159 * systems --which are supporting the VOP_POLL() interface- to notify
160 * the upper layer (poll(2), devpoll(4D) and now event ports) about
161 * the event triggered (see valid events in poll(2)).
162 * The pollwakeup() function forwards the event to the layer registered
163 * to receive the current event.
164 * The port_dissociate() function can be used to free the allocated
165 * event slot from the port. Anyway, file descriptors deliver events
166 * only one time and remain deactivated until the application
167 * reactivates the association of a file descriptor with port_associate().
168 * If an associated file descriptor is closed then the file descriptor
169 * will be dissociated automatically from the port.
170 *
171 * PORT_SOURCE_ALERT:
172 * This event type is generated when the port was previously set in
173 * alert mode using the port_alert() function.
174 * A single alert event is delivered to every thread which tries to
175 * retrieve events from a port.
176 * PORT_SOURCE_USER:
177 * This type of event is generated from user level using the port_send()
178 * function to send a user event to a port or the port_sendn() function
179 * to send an event to a list of ports.
180 * PORT_SOURCE_FILE:
181 * This event source uses the port_associate() interface to register
182 * a file to be monitored for changes. The file name that needs to be
183 * monitored is specified in the file_obj_t structure, a pointer to which
184 * is passed as an argument. The event types to be monitored are specified
185 * in the events argument.
186 * A file events monitor is represented internal per port per object
187 * address(the file_obj_t pointer). Which means there can be multiple
188 * watches registered on the same file using different file_obj_t
189 * structure pointer. With the help of the FEM(File Event Monitoring)
190 * hooks, the file's vnode ops are intercepted and relevant events
191 * delivered. The port_dissociate() function is used to de-register a
192 * file events monitor on a file. When the specified file is
193 * removed/renamed, the file events watch/monitor is automatically
194 * removed.
195 *
196 * EVENT DELIVERY / RETRIEVING EVENTS
197 * Events remain in the port queue until:
198 * - the application uses port_get() or port_getn() to retrieve events,
199 * - the event source cancel the event,
200 * - the event port is closed or
201 * - the process exits.
202 * The maximal number of events in a port queue is the maximal number
203 * of event slots/structures which can be allocated by event sources.
204 * The allocation of event slots/structures is controlled by the resource
205 * control: process.port-max-events.
206 * The port_get() function retrieves a single event and the port_getn()
207 * function retrieves a list of events.
208 * Events are classified as shareable and non-shareable events across processes.
209 * Non-shareable events are invisible for the port_get(n)() functions of
210 * processes other than the owner of the event.
211 * Shareable event types are:
212 * PORT_SOURCE_USER events
213 * This type of event is unconditionally shareable and without
214 * limitations. If the parent process sends a user event and closes
215 * the port afterwards, the event remains in the port and the child
216 * process will still be able to retrieve the user event.
217 * PORT_SOURCE_ALERT events
218 * This type of event is shareable between processes.
219 * Limitation: The alert mode of the port is removed if the owner
220 * (process which set the port in alert mode) of the
221 * alert event closes the port.
222 * PORT_SOURCE_FD events
223 * This type of event is conditional shareable between processes.
224 * After fork(2) all forked file descriptors are shareable between
225 * the processes. The child process is allowed to retrieve events
226 * from the associated file descriptors and it can also re-associate
227 * the fd with the port.
228 * Limitations: The child process is not allowed to dissociate
229 * the file descriptor from the port. Only the
230 * owner (process) of the association is allowed to
231 * dissociate the file descriptor from the port.
232 * If the owner of the association closes the port
233 * the association will be removed.
234 * PORT_SOURCE_AIO events
235 * This type of event is not shareable between processes.
236 * PORT_SOURCE_TIMER events
237 * This type of event is not shareable between processes.
238 * PORT_SOURCE_FILE events
239 * This type of event is not shareable between processes.
240 *
241 * FORK BEHAVIOUR
242 * On fork(2) the child process inherits all opened file descriptors from
243 * the parent process. This is also valid for port file descriptors.
244 * Associated file descriptors with a port maintain the association across the
245 * fork(2). It means, the child process gets full access to the port and
246 * it can retrieve events from all common associated file descriptors.
247 * Events of file descriptors created and associated with a port after the
248 * fork(2) are non-shareable and can only be retrieved by the same process.
249 *
250 * If the parent or the child process closes an exported port (using fork(2)
251 * or I_SENDFD) all the file descriptors associated with the port by the
252 * process will be dissociated from the port. Events of dissociated file
253 * descriptors as well as all non-shareable events will be discarded.
254 * The other process can continue working with the port as usual.
255 *
256 * CLOSING A PORT
257 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details.
258 *
259 * PORT EVENT STRUCTURES
260 * The global control structure of the event ports framework is port_control_t.
261 * port_control_t keeps track of the number of created ports in the system.
262 * The cache of the port event structures is also located in port_control_t.
263 *
264 * On port_create() the vnode and the portfs node is also created.
265 * The portfs node is represented by the port_t structure.
266 * The port_t structure manages all port specific tasks:
267 * - management of resource control values
268 * - port VOP_POLL interface
269 * - creation time
270 * - uid and gid of the port
271 *
272 * The port_t structure contains the port_queue_t structure.
273 * The port_queue_t structure contains all the data necessary for the
274 * queue management:
275 * - locking
276 * - condition variables
277 * - event counters
278 * - submitted events (represented by port_kevent_t structures)
279 * - threads waiting for event delivery (check portget_t structure)
280 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure)
281 * - event source management (managed by the port_source_t structure)
282 * - alert mode management (check port_alert_t structure)
283 *
284 * EVENT MANAGEMENT
285 * The event port file system creates a kmem_cache for internal allocation of
286 * event port structures.
287 *
288 * 1. Event source association with a port:
289 * The first step to do for event sources is to get associated with a port
290 * using the port_associate_ksource() function or adding an entry to the
291 * port_ksource_tab[]. An event source can get dissociated from a port
292 * using the port_dissociate_ksource() function. An entry in the
293 * port_ksource_tab[] implies that the source will be associated
294 * automatically with every new created port.
295 * The event source can deliver a callback function, which is used by the
296 * port to notify the event source about close(2). The idea is that
297 * in such a case the event source should free all allocated resources
298 * and it must return to the port all allocated slots/structures.
299 * The port_close() function will wait until all allocated event
300 * structures/slots are returned to the port.
301 * The callback function is not necessary when the event source does not
302 * maintain local resources, a second condition is that the event source
303 * can guarantee that allocated event slots will be returned without
304 * delay to the port (it will not block and sleep somewhere).
305 *
306 * 2. Reservation of an event slot / event structure
307 * The event port reliability is based on the reservation of an event "slot"
308 * (allocation of an event structure) by the event source as part of the
309 * application call. If the maximal number of event slots is exhausted then
310 * the event source can return a corresponding error code to the application.
311 *
312 * The port_alloc_event() function has to be used by event sources to
313 * allocate an event slot (reserve an event structure). The port_alloc_event()
314 * doesn not block and it will return a 0 value on success or an error code
315 * if it fails.
316 * An argument of port_alloc_event() is a flag which determines the behavior
317 * of the event after it was delivered to the application:
318 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the
319 * application.
320 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event
321 * source. This kind of slots can not be used for
322 * event delivery and should only be used internally
323 * by the event source.
324 * PORT_KEV_CACHED : event slot remains under the control of an event
325 * port cache. It does not become free after delivery
326 * to the application.
327 * PORT_ALLOC_SCACHED : event slot remains under the control of the event
328 * source. The event source takes the control over
329 * the slot after the event is delivered to the
330 * application.
331 *
332 * 3. Delivery of events to the event port
333 * Earlier allocated event structure/slot has to be used to deliver
334 * event data to the port. Event source has to use the function
335 * port_send_event(). The single argument is a pointer to the previously
336 * reserved event structure/slot.
337 * The portkev_events field of the port_kevent_t structure can be updated/set
338 * in two ways:
339 * 1. using the port_set_event() function, or
340 * 2. updating the portkev_events field out of the callback function:
341 * The event source can deliver a callback function to the port as an
342 * argument of port_init_event().
343 * One of the arguments of the callback function is a pointer to the
344 * events field, which will be delivered to the application.
345 * (see Delivery of events to the application).
346 * Event structures/slots can be delivered to the event port only one time,
347 * they remain blocked until the data is delivered to the application and the
348 * slot becomes free or it is delivered back to the event source
349 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above
350 * is at the same time the indicator for the event source that the event
351 * structure/slot is free for reuse.
352 *
353 * 4. Delivery of events to the application
354 * The events structures/slots delivered by event sources remain in the
355 * port queue until they are retrieved by the application or the port
356 * is closed (exit(2) also closes all opened file descriptors)..
357 * The application uses port_get() or port_getn() to retrieve events from
358 * a port. port_get() retrieves a single event structure/slot and port_getn()
359 * retrieves a list of event structures/slots.
360 * Both functions are able to poll for events and return immediately or they
361 * can specify a timeout value.
362 * Before the events are delivered to the application they are moved to a
363 * second temporary internal queue. The idea is to avoid lock collisions or
364 * contentions of the global queue lock.
365 * The global queue lock is used every time when an event source delivers
366 * new events to the port.
367 * The port_get() and port_getn() functions
368 * a) retrieve single events from the temporary queue,
369 * b) prepare the data to be passed to the application memory,
370 * c) activate the callback function of the event sources:
371 * - to get the latest event data,
372 * - the event source can free all allocated resources associated with the
373 * current event,
374 * - the event source can re-use the current event slot/structure
375 * - the event source can deny the delivery of the event to the application
376 * (e.g. because of the wrong process).
377 * d) put the event back to the temporary queue if the event delivery was denied
378 * e) repeat a) until d) as long as there are events in the queue and
379 * there is enough user space available.
380 *
381 * The loop described above could block for a very long time the global mutex,
382 * to avoid that a second mutex was introduced to synchronized concurrent
383 * threads accessing the temporary queue.
384 */
385
386 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
387 uintptr_t);
388
389 static struct sysent port_sysent = {
390 6,
391 SE_ARGC | SE_64RVAL | SE_NOUNLOAD,
392 (int (*)())(uintptr_t)portfs,
393 };
394
395 static struct modlsys modlsys = {
396 &mod_syscallops, "event ports", &port_sysent
397 };
398
399 #ifdef _SYSCALL32_IMPL
400
401 static int64_t
402 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4,
403 uint32_t arg5, uint32_t arg6);
404
405 static struct sysent port_sysent32 = {
406 6,
407 SE_ARGC | SE_64RVAL | SE_NOUNLOAD,
408 (int (*)())(uintptr_t)portfs32,
409 };
410
411 static struct modlsys modlsys32 = {
412 &mod_syscallops32,
413 "32-bit event ports syscalls",
414 &port_sysent32
415 };
416 #endif /* _SYSCALL32_IMPL */
417
418 static struct modlinkage modlinkage = {
419 MODREV_1,
420 &modlsys,
421 #ifdef _SYSCALL32_IMPL
422 &modlsys32,
423 #endif
424 NULL
425 };
426
427 port_kstat_t port_kstat = {
428 { "ports", KSTAT_DATA_UINT32 }
429 };
430
431 dev_t portdev;
432 struct vnodeops *port_vnodeops;
433 struct vfs port_vfs;
434
435 extern rctl_hndl_t rc_process_portev;
436 extern rctl_hndl_t rc_project_portids;
437 extern void aio_close_port(void *, int, pid_t, int);
438
439 /*
440 * This table contains a list of event sources which need a static
441 * association with a port (every port).
442 * The last NULL entry in the table is required to detect "end of table".
443 */
444 struct port_ksource port_ksource_tab[] = {
445 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL},
446 {0, NULL, NULL, NULL}
447 };
448
449 /* local functions */
450 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *,
451 port_gettimer_t *);
452 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *);
453 static int port_alert(port_t *, int, int, void *);
454 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *);
455 static int port_send(port_t *, int, int, void *);
456 static int port_create(int *);
457 static int port_get_alert(port_alert_t *, port_event_t *);
458 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *);
459 static int *port_errorn(int *, int, int, int);
460 static int port_noshare(void *, int *, pid_t, int, void *);
461 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *,
462 int);
463 static void port_init(port_t *);
464 static void port_remove_alert(port_queue_t *);
465 static void port_add_ksource_local(port_t *, port_ksource_t *);
466 static void port_check_return_cond(port_queue_t *);
467 static void port_dequeue_thread(port_queue_t *, portget_t *);
468 static portget_t *port_queue_thread(port_queue_t *, uint_t);
469 static void port_kstat_init(void);
470
471 #ifdef _SYSCALL32_IMPL
472 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *);
473 #endif
474
475 int
_init(void)476 _init(void)
477 {
478 static const fs_operation_def_t port_vfsops_template[] = {
479 NULL, NULL
480 };
481 extern const fs_operation_def_t port_vnodeops_template[];
482 vfsops_t *port_vfsops;
483 int error;
484 major_t major;
485
486 if ((major = getudev()) == (major_t)-1)
487 return (ENXIO);
488 portdev = makedevice(major, 0);
489
490 /* Create a dummy vfs */
491 error = vfs_makefsops(port_vfsops_template, &port_vfsops);
492 if (error) {
493 cmn_err(CE_WARN, "port init: bad vfs ops");
494 return (error);
495 }
496 vfs_setops(&port_vfs, port_vfsops);
497 port_vfs.vfs_flag = VFS_RDONLY;
498 port_vfs.vfs_dev = portdev;
499 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0);
500
501 error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops);
502 if (error) {
503 vfs_freevfsops(port_vfsops);
504 cmn_err(CE_WARN, "port init: bad vnode ops");
505 return (error);
506 }
507
508 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL);
509 port_control.pc_nents = 0; /* number of active ports */
510
511 /* create kmem_cache for port event structures */
512 port_control.pc_cache = kmem_cache_create("port_cache",
513 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
514
515 port_kstat_init(); /* init port kstats */
516 return (mod_install(&modlinkage));
517 }
518
519 int
_info(struct modinfo * modinfop)520 _info(struct modinfo *modinfop)
521 {
522 return (mod_info(&modlinkage, modinfop));
523 }
524
525 /*
526 * System call wrapper for all port related system calls from 32-bit programs.
527 */
528 #ifdef _SYSCALL32_IMPL
529 static int64_t
portfs32(uint32_t opcode,int32_t a0,uint32_t a1,uint32_t a2,uint32_t a3,uint32_t a4)530 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3,
531 uint32_t a4)
532 {
533 int64_t error;
534
535 switch (opcode & PORT_CODE_MASK) {
536 case PORT_GET:
537 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4);
538 break;
539 case PORT_SENDN:
540 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4);
541 break;
542 default:
543 error = portfs(opcode, a0, a1, a2, a3, a4);
544 break;
545 }
546 return (error);
547 }
548 #endif /* _SYSCALL32_IMPL */
549
550 /*
551 * System entry point for port functions.
552 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE).
553 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a
554 * port file descriptor as first argument.
555 */
556 static int64_t
portfs(int opcode,uintptr_t a0,uintptr_t a1,uintptr_t a2,uintptr_t a3,uintptr_t a4)557 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3,
558 uintptr_t a4)
559 {
560 rval_t r;
561 port_t *pp;
562 int error = 0;
563 uint_t nget;
564 file_t *fp;
565 port_gettimer_t port_timer;
566
567 r.r_vals = 0;
568 if (opcode & PORT_SYS_NOPORT) {
569 opcode &= PORT_CODE_MASK;
570 if (opcode == PORT_SENDN) {
571 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2,
572 (int)a3, (void *)a4, (uint_t *)&r.r_val1);
573 if (error && (error != EIO))
574 return ((int64_t)set_errno(error));
575 return (r.r_vals);
576 }
577
578 if (opcode == PORT_CREATE) {
579 error = port_create(&r.r_val1);
580 if (error)
581 return ((int64_t)set_errno(error));
582 return (r.r_vals);
583 }
584 }
585
586 /* opcodes using port as first argument (a0) */
587
588 if ((fp = getf((int)a0)) == NULL)
589 return ((uintptr_t)set_errno(EBADF));
590
591 if (fp->f_vnode->v_type != VPORT) {
592 releasef((int)a0);
593 return ((uintptr_t)set_errno(EBADFD));
594 }
595
596 pp = VTOEP(fp->f_vnode);
597
598 switch (opcode & PORT_CODE_MASK) {
599 case PORT_GET:
600 {
601 /* see PORT_GETN description */
602 struct timespec timeout;
603
604 port_timer.pgt_flags = PORTGET_ONE;
605 port_timer.pgt_loop = 0;
606 port_timer.pgt_rqtp = NULL;
607 if (a4 != 0) {
608 port_timer.pgt_timeout = &timeout;
609 timeout.tv_sec = (time_t)a2;
610 timeout.tv_nsec = (long)a3;
611 } else {
612 port_timer.pgt_timeout = NULL;
613 }
614 do {
615 nget = 1;
616 error = port_getn(pp, (port_event_t *)a1, 1,
617 (uint_t *)&nget, &port_timer);
618 } while (nget == 0 && error == 0 && port_timer.pgt_loop);
619 break;
620 }
621 case PORT_GETN:
622 {
623 /*
624 * port_getn() can only retrieve own or shareable events from
625 * other processes. The port_getn() function remains in the
626 * kernel until own or shareable events are available or the
627 * timeout elapses.
628 */
629 port_timer.pgt_flags = 0;
630 port_timer.pgt_loop = 0;
631 port_timer.pgt_rqtp = NULL;
632 port_timer.pgt_timeout = (struct timespec *)a4;
633 do {
634 nget = a3;
635 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2,
636 (uint_t *)&nget, &port_timer);
637 } while (nget == 0 && error == 0 && port_timer.pgt_loop);
638 r.r_val1 = nget;
639 r.r_val2 = error;
640 releasef((int)a0);
641 if (error && error != ETIME)
642 return ((int64_t)set_errno(error));
643 return (r.r_vals);
644 }
645 case PORT_ASSOCIATE:
646 {
647 switch ((int)a1) {
648 case PORT_SOURCE_FD:
649 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2,
650 (int)a3, (void *)a4);
651 break;
652 case PORT_SOURCE_FILE:
653 error = port_associate_fop(pp, (int)a1, (uintptr_t)a2,
654 (int)a3, (void *)a4);
655 break;
656 default:
657 error = EINVAL;
658 break;
659 }
660 break;
661 }
662 case PORT_SEND:
663 {
664 /* user-defined events */
665 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2);
666 break;
667 }
668 case PORT_DISPATCH:
669 {
670 /*
671 * library events, blocking
672 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ
673 * are currently allowed.
674 */
675 if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) {
676 error = EINVAL;
677 break;
678 }
679 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2,
680 (uintptr_t)a3, (void *)a4);
681 break;
682 }
683 case PORT_DISSOCIATE:
684 {
685 switch ((int)a1) {
686 case PORT_SOURCE_FD:
687 error = port_dissociate_fd(pp, (uintptr_t)a2);
688 break;
689 case PORT_SOURCE_FILE:
690 error = port_dissociate_fop(pp, (uintptr_t)a2);
691 break;
692 default:
693 error = EINVAL;
694 break;
695 }
696 break;
697 }
698 case PORT_ALERT:
699 {
700 if ((int)a2) /* a2 = events */
701 error = port_alert(pp, (int)a1, (int)a2, (void *)a3);
702 else
703 port_remove_alert(&pp->port_queue);
704 break;
705 }
706 default:
707 error = EINVAL;
708 break;
709 }
710
711 releasef((int)a0);
712 if (error)
713 return ((int64_t)set_errno(error));
714 return (r.r_vals);
715 }
716
717 /*
718 * System call to create a port.
719 *
720 * The port_create() function creates a vnode of type VPORT per port.
721 * The port control data is associated with the vnode as vnode private data.
722 * The port_create() function returns an event port file descriptor.
723 */
724 static int
port_create(int * fdp)725 port_create(int *fdp)
726 {
727 port_t *pp;
728 vnode_t *vp;
729 struct file *fp;
730 proc_t *p = curproc;
731
732 /* initialize vnode and port private data */
733 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP);
734
735 pp->port_vnode = vn_alloc(KM_SLEEP);
736 vp = EPTOV(pp);
737 vn_setops(vp, port_vnodeops);
738 vp->v_type = VPORT;
739 vp->v_vfsp = &port_vfs;
740 vp->v_data = (caddr_t)pp;
741
742 mutex_enter(&port_control.pc_mutex);
743 /*
744 * Retrieve the maximal number of event ports allowed per system from
745 * the resource control: project.port-max-ids.
746 */
747 mutex_enter(&p->p_lock);
748 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p,
749 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) {
750 mutex_exit(&p->p_lock);
751 vn_free(vp);
752 kmem_free(pp, sizeof (port_t));
753 mutex_exit(&port_control.pc_mutex);
754 return (EAGAIN);
755 }
756
757 /*
758 * Retrieve the maximal number of events allowed per port from
759 * the resource control: process.port-max-events.
760 */
761 pp->port_max_events = rctl_enforced_value(rc_process_portev,
762 p->p_rctls, p);
763 mutex_exit(&p->p_lock);
764
765 /* allocate a new user file descriptor and a file structure */
766 if (falloc(vp, 0, &fp, fdp)) {
767 /*
768 * If the file table is full, free allocated resources.
769 */
770 vn_free(vp);
771 kmem_free(pp, sizeof (port_t));
772 mutex_exit(&port_control.pc_mutex);
773 return (EMFILE);
774 }
775
776 mutex_exit(&fp->f_tlock);
777
778 pp->port_fd = *fdp;
779 port_control.pc_nents++;
780 p->p_portcnt++;
781 port_kstat.pks_ports.value.ui32++;
782 mutex_exit(&port_control.pc_mutex);
783
784 /* initializes port private data */
785 port_init(pp);
786 /* set user file pointer */
787 setf(*fdp, fp);
788 return (0);
789 }
790
791 /*
792 * port_init() initializes event port specific data
793 */
794 static void
port_init(port_t * pp)795 port_init(port_t *pp)
796 {
797 port_queue_t *portq;
798 port_ksource_t *pks;
799
800 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL);
801 portq = &pp->port_queue;
802 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL);
803 pp->port_flags |= PORT_INIT;
804
805 /*
806 * If it is not enough memory available to satisfy a user
807 * request using a single port_getn() call then port_getn()
808 * will reduce the size of the list to PORT_MAX_LIST.
809 */
810 pp->port_max_list = port_max_list;
811
812 /* Set timestamp entries required for fstat(2) requests */
813 gethrestime(&pp->port_ctime);
814 pp->port_uid = crgetuid(curproc->p_cred);
815 pp->port_gid = crgetgid(curproc->p_cred);
816
817 /* initialize port queue structs */
818 list_create(&portq->portq_list, sizeof (port_kevent_t),
819 offsetof(port_kevent_t, portkev_node));
820 list_create(&portq->portq_get_list, sizeof (port_kevent_t),
821 offsetof(port_kevent_t, portkev_node));
822 portq->portq_flags = 0;
823 pp->port_pid = curproc->p_pid;
824
825 /* Allocate cache skeleton for PORT_SOURCE_FD events */
826 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP);
827 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL);
828 portq->portq_pcp->pc_flag = PC_PORTFS;
829
830 /*
831 * Allocate cache skeleton for association of event sources.
832 */
833 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL);
834 portq->portq_scache = kmem_zalloc(
835 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP);
836
837 /*
838 * pre-associate some kernel sources with this port.
839 * The pre-association is required to create port_source_t
840 * structures for object association.
841 * Some sources can not get associated with a port before the first
842 * object association is requested. Another reason to pre_associate
843 * a particular source with a port is because of performance.
844 */
845
846 for (pks = port_ksource_tab; pks->pks_source != 0; pks++)
847 port_add_ksource_local(pp, pks);
848 }
849
850 /*
851 * The port_add_ksource_local() function is being used to associate
852 * event sources with every new port.
853 * The event sources need to be added to port_ksource_tab[].
854 */
855 static void
port_add_ksource_local(port_t * pp,port_ksource_t * pks)856 port_add_ksource_local(port_t *pp, port_ksource_t *pks)
857 {
858 port_source_t *pse;
859 port_source_t **ps;
860
861 mutex_enter(&pp->port_queue.portq_source_mutex);
862 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)];
863 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) {
864 if (pse->portsrc_source == pks->pks_source)
865 break;
866 }
867
868 if (pse == NULL) {
869 /* associate new source with the port */
870 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP);
871 pse->portsrc_source = pks->pks_source;
872 pse->portsrc_close = pks->pks_close;
873 pse->portsrc_closearg = pks->pks_closearg;
874 pse->portsrc_cnt = 1;
875
876 pks->pks_portsrc = pse;
877 if (*ps != NULL)
878 pse->portsrc_next = (*ps)->portsrc_next;
879 *ps = pse;
880 }
881 mutex_exit(&pp->port_queue.portq_source_mutex);
882 }
883
884 /*
885 * The port_send() function sends an event of type "source" to a
886 * port. This function is non-blocking. An event can be sent to
887 * a port as long as the number of events per port does not achieve the
888 * maximal allowed number of events. The max. number of events per port is
889 * defined by the resource control process.max-port-events.
890 * This function is used by the port library function port_send()
891 * and port_dispatch(). The port_send(3c) function is part of the
892 * event ports API and submits events of type PORT_SOURCE_USER. The
893 * port_dispatch() function is project private and it is used by library
894 * functions to submit events of other types than PORT_SOURCE_USER
895 * (e.g. PORT_SOURCE_AIO).
896 */
897 static int
port_send(port_t * pp,int source,int events,void * user)898 port_send(port_t *pp, int source, int events, void *user)
899 {
900 port_kevent_t *pev;
901 int error;
902
903 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev);
904 if (error)
905 return (error);
906
907 pev->portkev_object = 0;
908 pev->portkev_events = events;
909 pev->portkev_user = user;
910 pev->portkev_callback = NULL;
911 pev->portkev_arg = NULL;
912 pev->portkev_flags = 0;
913
914 port_send_event(pev);
915 return (0);
916 }
917
918 /*
919 * The port_noshare() function returns 0 if the current event was generated
920 * by the same process. Otherwise is returns a value other than 0 and the
921 * event should not be delivered to the current processe.
922 * The port_noshare() function is normally used by the port_dispatch()
923 * function. The port_dispatch() function is project private and can only be
924 * used within the event port project.
925 * Currently the libaio uses the port_dispatch() function to deliver events
926 * of types PORT_SOURCE_AIO.
927 */
928 /* ARGSUSED */
929 static int
port_noshare(void * arg,int * events,pid_t pid,int flag,void * evp)930 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp)
931 {
932 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid)
933 return (1);
934 return (0);
935 }
936
937 /*
938 * The port_dispatch_event() function is project private and it is used by
939 * libraries involved in the project to deliver events to the port.
940 * port_dispatch will sleep and wait for enough resources to satisfy the
941 * request, if necessary.
942 * The library can specify if the delivered event is shareable with other
943 * processes (see PORT_SYS_NOSHARE flag).
944 */
945 static int
port_dispatch_event(port_t * pp,int opcode,int source,int events,uintptr_t object,void * user)946 port_dispatch_event(port_t *pp, int opcode, int source, int events,
947 uintptr_t object, void *user)
948 {
949 port_kevent_t *pev;
950 int error;
951
952 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev);
953 if (error)
954 return (error);
955
956 pev->portkev_object = object;
957 pev->portkev_events = events;
958 pev->portkev_user = user;
959 pev->portkev_arg = NULL;
960 if (opcode & PORT_SYS_NOSHARE) {
961 pev->portkev_flags = PORT_KEV_NOSHARE;
962 pev->portkev_callback = port_noshare;
963 } else {
964 pev->portkev_flags = 0;
965 pev->portkev_callback = NULL;
966 }
967
968 port_send_event(pev);
969 return (0);
970 }
971
972
973 /*
974 * The port_sendn() function is the kernel implementation of the event
975 * port API function port_sendn(3c).
976 * This function is able to send an event to a list of event ports.
977 */
978 static int
port_sendn(int ports[],int errors[],uint_t nent,int events,void * user,uint_t * nget)979 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user,
980 uint_t *nget)
981 {
982 port_kevent_t *pev;
983 int errorcnt = 0;
984 int error = 0;
985 int count;
986 int port;
987 int *plist;
988 int *elist = NULL;
989 file_t *fp;
990 port_t *pp;
991
992 if (nent == 0 || nent > port_max_list)
993 return (EINVAL);
994
995 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP);
996 if (copyin((void *)ports, plist, nent * sizeof (int))) {
997 kmem_free(plist, nent * sizeof (int));
998 return (EFAULT);
999 }
1000
1001 /*
1002 * Scan the list for event port file descriptors and send the
1003 * attached user event data embedded in a event of type
1004 * PORT_SOURCE_USER to every event port in the list.
1005 * If a list entry is not a valid event port then the corresponding
1006 * error code will be stored in the errors[] list with the same
1007 * list offset as in the ports[] list.
1008 */
1009
1010 for (count = 0; count < nent; count++) {
1011 port = plist[count];
1012 if ((fp = getf(port)) == NULL) {
1013 elist = port_errorn(elist, nent, EBADF, count);
1014 errorcnt++;
1015 continue;
1016 }
1017
1018 pp = VTOEP(fp->f_vnode);
1019 if (fp->f_vnode->v_type != VPORT) {
1020 releasef(port);
1021 elist = port_errorn(elist, nent, EBADFD, count);
1022 errorcnt++;
1023 continue;
1024 }
1025
1026 error = port_alloc_event_local(pp, PORT_SOURCE_USER,
1027 PORT_ALLOC_DEFAULT, &pev);
1028 if (error) {
1029 releasef(port);
1030 elist = port_errorn(elist, nent, error, count);
1031 errorcnt++;
1032 continue;
1033 }
1034
1035 pev->portkev_object = 0;
1036 pev->portkev_events = events;
1037 pev->portkev_user = user;
1038 pev->portkev_callback = NULL;
1039 pev->portkev_arg = NULL;
1040 pev->portkev_flags = 0;
1041
1042 port_send_event(pev);
1043 releasef(port);
1044 }
1045 if (errorcnt) {
1046 error = EIO;
1047 if (copyout(elist, (void *)errors, nent * sizeof (int)))
1048 error = EFAULT;
1049 kmem_free(elist, nent * sizeof (int));
1050 }
1051 *nget = nent - errorcnt;
1052 kmem_free(plist, nent * sizeof (int));
1053 return (error);
1054 }
1055
1056 static int *
port_errorn(int * elist,int nent,int error,int index)1057 port_errorn(int *elist, int nent, int error, int index)
1058 {
1059 if (elist == NULL)
1060 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP);
1061 elist[index] = error;
1062 return (elist);
1063 }
1064
1065 /*
1066 * port_alert()
1067 * The port_alert() funcion is a high priority event and it is always set
1068 * on top of the queue. It is also delivered as single event.
1069 * flags:
1070 * - SET :overwrite current alert data
1071 * - UPDATE:set alert data or return EBUSY if alert mode is already set
1072 *
1073 * - set the ALERT flag
1074 * - wakeup all sleeping threads
1075 */
1076 static int
port_alert(port_t * pp,int flags,int events,void * user)1077 port_alert(port_t *pp, int flags, int events, void *user)
1078 {
1079 port_queue_t *portq;
1080 portget_t *pgetp;
1081 port_alert_t *pa;
1082
1083 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID)
1084 return (EINVAL);
1085
1086 portq = &pp->port_queue;
1087 pa = &portq->portq_alert;
1088 mutex_enter(&portq->portq_mutex);
1089
1090 /* check alert conditions */
1091 if (flags == PORT_ALERT_UPDATE) {
1092 if (portq->portq_flags & PORTQ_ALERT) {
1093 mutex_exit(&portq->portq_mutex);
1094 return (EBUSY);
1095 }
1096 }
1097
1098 /*
1099 * Store alert data in the port to be delivered to threads
1100 * which are using port_get(n) to retrieve events.
1101 */
1102
1103 portq->portq_flags |= PORTQ_ALERT;
1104 pa->portal_events = events; /* alert info */
1105 pa->portal_pid = curproc->p_pid; /* process owner */
1106 pa->portal_object = 0; /* no object */
1107 pa->portal_user = user; /* user alert data */
1108
1109 /* alert and deliver alert data to waiting threads */
1110 pgetp = portq->portq_thread;
1111 if (pgetp == NULL) {
1112 /* no threads waiting for events */
1113 mutex_exit(&portq->portq_mutex);
1114 return (0);
1115 }
1116
1117 /*
1118 * Set waiting threads in alert mode (PORTGET_ALERT)..
1119 * Every thread waiting for events already allocated a portget_t
1120 * structure to sleep on.
1121 * The port alert arguments are stored in the portget_t structure.
1122 * The PORTGET_ALERT flag is set to indicate the thread to return
1123 * immediately with the alert event.
1124 */
1125 do {
1126 if ((pgetp->portget_state & PORTGET_ALERT) == 0) {
1127 pa = &pgetp->portget_alert;
1128 pa->portal_events = events;
1129 pa->portal_object = 0;
1130 pa->portal_user = user;
1131 pgetp->portget_state |= PORTGET_ALERT;
1132 cv_signal(&pgetp->portget_cv);
1133 }
1134 } while ((pgetp = pgetp->portget_next) != portq->portq_thread);
1135 mutex_exit(&portq->portq_mutex);
1136 return (0);
1137 }
1138
1139 /*
1140 * Clear alert state of the port
1141 */
1142 static void
port_remove_alert(port_queue_t * portq)1143 port_remove_alert(port_queue_t *portq)
1144 {
1145 mutex_enter(&portq->portq_mutex);
1146 portq->portq_flags &= ~PORTQ_ALERT;
1147 mutex_exit(&portq->portq_mutex);
1148 }
1149
1150 /*
1151 * The port_getn() function is used to retrieve events from a port.
1152 *
1153 * The port_getn() function returns immediately if there are enough events
1154 * available in the port to satisfy the request or if the port is in alert
1155 * mode (see port_alert(3c)).
1156 * The timeout argument of port_getn(3c) -which is embedded in the
1157 * port_gettimer_t structure- specifies if the system call should block or if it
1158 * should return immediately depending on the number of events available.
1159 * This function is internally used by port_getn(3c) as well as by
1160 * port_get(3c).
1161 */
1162 static int
port_getn(port_t * pp,port_event_t * uevp,uint_t max,uint_t * nget,port_gettimer_t * pgt)1163 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget,
1164 port_gettimer_t *pgt)
1165 {
1166 port_queue_t *portq;
1167 port_kevent_t *pev;
1168 port_kevent_t *lev;
1169 int error = 0;
1170 uint_t nmax;
1171 uint_t nevents;
1172 uint_t eventsz;
1173 port_event_t *kevp;
1174 list_t *glist;
1175 uint_t tnent;
1176 int rval;
1177 int blocking = -1;
1178 int timecheck;
1179 int flag;
1180 timespec_t rqtime;
1181 timespec_t *rqtp = NULL;
1182 portget_t *pgetp;
1183 void *results;
1184 model_t model = get_udatamodel();
1185
1186 flag = pgt->pgt_flags;
1187
1188 if (*nget > max && max > 0)
1189 return (EINVAL);
1190
1191 portq = &pp->port_queue;
1192 mutex_enter(&portq->portq_mutex);
1193 if (max == 0) {
1194 /*
1195 * Return number of objects with events.
1196 * The port_block() call is required to synchronize this
1197 * thread with another possible thread, which could be
1198 * retrieving events from the port queue.
1199 */
1200 port_block(portq);
1201 /*
1202 * Check if a second thread is currently retrieving events
1203 * and it is using the temporary event queue.
1204 */
1205 if (portq->portq_tnent) {
1206 /* put remaining events back to the port queue */
1207 port_push_eventq(portq);
1208 }
1209 *nget = portq->portq_nent;
1210 port_unblock(portq);
1211 mutex_exit(&portq->portq_mutex);
1212 return (0);
1213 }
1214
1215 if (uevp == NULL) {
1216 mutex_exit(&portq->portq_mutex);
1217 return (EFAULT);
1218 }
1219 if (*nget == 0) { /* no events required */
1220 mutex_exit(&portq->portq_mutex);
1221 return (0);
1222 }
1223
1224 /* port is being closed ... */
1225 if (portq->portq_flags & PORTQ_CLOSE) {
1226 mutex_exit(&portq->portq_mutex);
1227 return (EBADFD);
1228 }
1229
1230 /* return immediately if port in alert mode */
1231 if (portq->portq_flags & PORTQ_ALERT) {
1232 error = port_get_alert(&portq->portq_alert, uevp);
1233 if (error == 0)
1234 *nget = 1;
1235 mutex_exit(&portq->portq_mutex);
1236 return (error);
1237 }
1238
1239 portq->portq_thrcnt++;
1240
1241 /*
1242 * Now check if the completed events satisfy the
1243 * "wait" requirements of the current thread:
1244 */
1245
1246 if (pgt->pgt_loop) {
1247 /*
1248 * loop entry of same thread
1249 * pgt_loop is set when the current thread returns
1250 * prematurely from this function. That could happen
1251 * when a port is being shared between processes and
1252 * this thread could not find events to return.
1253 * It is not allowed to a thread to retrieve non-shareable
1254 * events generated in other processes.
1255 * PORTQ_WAIT_EVENTS is set when a thread already
1256 * checked the current event queue and no new events
1257 * are added to the queue.
1258 */
1259 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) &&
1260 (portq->portq_nent >= *nget)) {
1261 /* some new events arrived ...check them */
1262 goto portnowait;
1263 }
1264 rqtp = pgt->pgt_rqtp;
1265 timecheck = pgt->pgt_timecheck;
1266 pgt->pgt_flags |= PORTGET_WAIT_EVENTS;
1267 } else {
1268 /* check if enough events are available ... */
1269 if (portq->portq_nent >= *nget)
1270 goto portnowait;
1271 /*
1272 * There are not enough events available to satisfy
1273 * the request, check timeout value and wait for
1274 * incoming events.
1275 */
1276 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp,
1277 &blocking, flag);
1278 if (error) {
1279 port_check_return_cond(portq);
1280 mutex_exit(&portq->portq_mutex);
1281 return (error);
1282 }
1283
1284 if (blocking == 0) /* don't block, check fired events */
1285 goto portnowait;
1286
1287 if (rqtp != NULL) {
1288 timespec_t now;
1289 timecheck = timechanged;
1290 gethrestime(&now);
1291 timespecadd(rqtp, &now);
1292 }
1293 }
1294
1295 /* enqueue thread in the list of waiting threads */
1296 pgetp = port_queue_thread(portq, *nget);
1297
1298
1299 /* Wait here until return conditions met */
1300 for (;;) {
1301 if (pgetp->portget_state & PORTGET_ALERT) {
1302 /* reap alert event and return */
1303 error = port_get_alert(&pgetp->portget_alert, uevp);
1304 if (error)
1305 *nget = 0;
1306 else
1307 *nget = 1;
1308 port_dequeue_thread(&pp->port_queue, pgetp);
1309 portq->portq_thrcnt--;
1310 mutex_exit(&portq->portq_mutex);
1311 return (error);
1312 }
1313
1314 /*
1315 * Check if some other thread is already retrieving
1316 * events (portq_getn > 0).
1317 */
1318
1319 if ((portq->portq_getn == 0) &&
1320 ((portq)->portq_nent >= *nget) &&
1321 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) ||
1322 !((portq)->portq_flags & PORTQ_WAIT_EVENTS)))
1323 break;
1324
1325 if (portq->portq_flags & PORTQ_CLOSE) {
1326 error = EBADFD;
1327 break;
1328 }
1329
1330 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex,
1331 rqtp, timecheck);
1332
1333 if (rval <= 0) {
1334 error = (rval == 0) ? EINTR : ETIME;
1335 break;
1336 }
1337 }
1338
1339 /* take thread out of the wait queue */
1340 port_dequeue_thread(portq, pgetp);
1341
1342 if (error != 0 && (error == EINTR || error == EBADFD ||
1343 (error == ETIME && flag))) {
1344 /* return without events */
1345 port_check_return_cond(portq);
1346 mutex_exit(&portq->portq_mutex);
1347 return (error);
1348 }
1349
1350 portnowait:
1351 /*
1352 * Move port event queue to a temporary event queue .
1353 * New incoming events will be continue be posted to the event queue
1354 * and they will not be considered by the current thread.
1355 * The idea is to avoid lock contentions or an often locking/unlocking
1356 * of the port queue mutex. The contention and performance degradation
1357 * could happen because:
1358 * a) incoming events use the port queue mutex to enqueue new events and
1359 * b) before the event can be delivered to the application it is
1360 * necessary to notify the event sources about the event delivery.
1361 * Sometimes the event sources can require a long time to return and
1362 * the queue mutex would block incoming events.
1363 * During this time incoming events (port_send_event()) do not need
1364 * to awake threads waiting for events. Before the current thread
1365 * returns it will check the conditions to awake other waiting threads.
1366 */
1367 portq->portq_getn++; /* number of threads retrieving events */
1368 port_block(portq); /* block other threads here */
1369 nmax = max < portq->portq_nent ? max : portq->portq_nent;
1370
1371 if (portq->portq_tnent) {
1372 /*
1373 * Move remaining events from previous thread back to the
1374 * port event queue.
1375 */
1376 port_push_eventq(portq);
1377 }
1378 /* move port event queue to a temporary queue */
1379 list_move_tail(&portq->portq_get_list, &portq->portq_list);
1380 glist = &portq->portq_get_list; /* use temporary event queue */
1381 tnent = portq->portq_nent; /* get current number of events */
1382 portq->portq_nent = 0; /* no events in the port event queue */
1383 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */
1384 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */
1385
1386 if (model == DATAMODEL_NATIVE) {
1387 eventsz = sizeof (port_event_t);
1388
1389 if (nmax == 0) {
1390 kevp = NULL;
1391 } else {
1392 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
1393 if (kevp == NULL) {
1394 if (nmax > pp->port_max_list)
1395 nmax = pp->port_max_list;
1396 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP);
1397 }
1398 }
1399
1400 results = kevp;
1401 lev = NULL; /* start with first event in the queue */
1402 for (nevents = 0; nevents < nmax; ) {
1403 pev = port_get_kevent(glist, lev);
1404 if (pev == NULL) /* no more events available */
1405 break;
1406 if (pev->portkev_flags & PORT_KEV_FREE) {
1407 /* Just discard event */
1408 list_remove(glist, pev);
1409 pev->portkev_flags &= ~(PORT_CLEANUP_DONE);
1410 if (PORT_FREE_EVENT(pev))
1411 port_free_event_local(pev, 0);
1412 tnent--;
1413 continue;
1414 }
1415
1416 /* move event data to copyout list */
1417 if (port_copy_event(&kevp[nevents], pev, glist)) {
1418 /*
1419 * Event can not be delivered to the
1420 * current process.
1421 */
1422 if (lev != NULL)
1423 list_insert_after(glist, lev, pev);
1424 else
1425 list_insert_head(glist, pev);
1426 lev = pev; /* last checked event */
1427 } else {
1428 nevents++; /* # of events ready */
1429 }
1430 }
1431 #ifdef _SYSCALL32_IMPL
1432 } else {
1433 port_event32_t *kevp32;
1434
1435 eventsz = sizeof (port_event32_t);
1436
1437 if (nmax == 0) {
1438 kevp32 = NULL;
1439 } else {
1440 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
1441 if (kevp32 == NULL) {
1442 if (nmax > pp->port_max_list)
1443 nmax = pp->port_max_list;
1444 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP);
1445 }
1446 }
1447
1448 results = kevp32;
1449 lev = NULL; /* start with first event in the queue */
1450 for (nevents = 0; nevents < nmax; ) {
1451 pev = port_get_kevent(glist, lev);
1452 if (pev == NULL) /* no more events available */
1453 break;
1454 if (pev->portkev_flags & PORT_KEV_FREE) {
1455 /* Just discard event */
1456 list_remove(glist, pev);
1457 pev->portkev_flags &= ~(PORT_CLEANUP_DONE);
1458 if (PORT_FREE_EVENT(pev))
1459 port_free_event_local(pev, 0);
1460 tnent--;
1461 continue;
1462 }
1463
1464 /* move event data to copyout list */
1465 if (port_copy_event32(&kevp32[nevents], pev, glist)) {
1466 /*
1467 * Event can not be delivered to the
1468 * current process.
1469 */
1470 if (lev != NULL)
1471 list_insert_after(glist, lev, pev);
1472 else
1473 list_insert_head(glist, pev);
1474 lev = pev; /* last checked event */
1475 } else {
1476 nevents++; /* # of events ready */
1477 }
1478 }
1479 #endif /* _SYSCALL32_IMPL */
1480 }
1481
1482 /*
1483 * Remember number of remaining events in the temporary event queue.
1484 */
1485 portq->portq_tnent = tnent - nevents;
1486
1487 /*
1488 * Work to do before return :
1489 * - push list of remaining events back to the top of the standard
1490 * port queue.
1491 * - if this is the last thread calling port_get(n) then wakeup the
1492 * thread waiting on close(2).
1493 * - check for a deferred cv_signal from port_send_event() and wakeup
1494 * the sleeping thread.
1495 */
1496
1497 mutex_enter(&portq->portq_mutex);
1498 port_unblock(portq);
1499 if (portq->portq_tnent) {
1500 /*
1501 * move remaining events in the temporary event queue back
1502 * to the port event queue
1503 */
1504 port_push_eventq(portq);
1505 }
1506 portq->portq_getn--; /* update # of threads retrieving events */
1507 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */
1508 /* Last thread => check close(2) conditions ... */
1509 if (portq->portq_flags & PORTQ_CLOSE) {
1510 cv_signal(&portq->portq_closecv);
1511 mutex_exit(&portq->portq_mutex);
1512 kmem_free(results, eventsz * nmax);
1513 /* do not copyout events */
1514 *nget = 0;
1515 return (EBADFD);
1516 }
1517 } else if (portq->portq_getn == 0) {
1518 /*
1519 * no other threads retrieving events ...
1520 * check wakeup conditions of sleeping threads
1521 */
1522 if ((portq->portq_thread != NULL) &&
1523 (portq->portq_nent >= portq->portq_nget))
1524 cv_signal(&portq->portq_thread->portget_cv);
1525 }
1526
1527 /*
1528 * Check PORTQ_POLLIN here because the current thread set temporarily
1529 * the number of events in the queue to zero.
1530 */
1531 if (portq->portq_flags & PORTQ_POLLIN) {
1532 portq->portq_flags &= ~PORTQ_POLLIN;
1533 mutex_exit(&portq->portq_mutex);
1534 pollwakeup(&pp->port_pollhd, POLLIN);
1535 } else {
1536 mutex_exit(&portq->portq_mutex);
1537 }
1538
1539 /* now copyout list of user event structures to user space */
1540 if (nevents) {
1541 if (copyout(results, uevp, nevents * eventsz))
1542 error = EFAULT;
1543 }
1544 kmem_free(results, eventsz * nmax);
1545
1546 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) {
1547 /* no events retrieved: check loop conditions */
1548 if (blocking == -1) {
1549 /* no timeout checked */
1550 error = port_get_timeout(pgt->pgt_timeout,
1551 &pgt->pgt_rqtime, &rqtp, &blocking, flag);
1552 if (error) {
1553 *nget = nevents;
1554 return (error);
1555 }
1556 if (rqtp != NULL) {
1557 timespec_t now;
1558 pgt->pgt_timecheck = timechanged;
1559 gethrestime(&now);
1560 timespecadd(&pgt->pgt_rqtime, &now);
1561 }
1562 pgt->pgt_rqtp = rqtp;
1563 } else {
1564 /* timeout already checked -> remember values */
1565 pgt->pgt_rqtp = rqtp;
1566 if (rqtp != NULL) {
1567 pgt->pgt_timecheck = timecheck;
1568 pgt->pgt_rqtime = *rqtp;
1569 }
1570 }
1571 if (blocking)
1572 /* timeout remaining */
1573 pgt->pgt_loop = 1;
1574 }
1575
1576 /* set number of user event structures completed */
1577 *nget = nevents;
1578 return (error);
1579 }
1580
1581 /*
1582 * 1. copy kernel event structure to user event structure.
1583 * 2. PORT_KEV_WIRED event structures will be reused by the "source"
1584 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue)
1585 * 4. Other types of event structures can be delivered back to the port cache
1586 * (port_free_event_local()).
1587 * 5. The event source callback function is the last opportunity for the
1588 * event source to update events, to free local resources associated with
1589 * the event or to deny the delivery of the event.
1590 */
1591 static int
port_copy_event(port_event_t * puevp,port_kevent_t * pkevp,list_t * list)1592 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list)
1593 {
1594 int free_event = 0;
1595 int flags;
1596 int error;
1597
1598 puevp->portev_source = pkevp->portkev_source;
1599 puevp->portev_object = pkevp->portkev_object;
1600 puevp->portev_user = pkevp->portkev_user;
1601 puevp->portev_events = pkevp->portkev_events;
1602
1603 /* remove event from the queue */
1604 list_remove(list, pkevp);
1605
1606 /*
1607 * Events of type PORT_KEV_WIRED remain allocated by the
1608 * event source.
1609 */
1610 flags = pkevp->portkev_flags;
1611 if (pkevp->portkev_flags & PORT_KEV_WIRED)
1612 pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
1613 else
1614 free_event = 1;
1615
1616 if (pkevp->portkev_callback) {
1617 error = (*pkevp->portkev_callback)(pkevp->portkev_arg,
1618 &puevp->portev_events, pkevp->portkev_pid,
1619 PORT_CALLBACK_DEFAULT, pkevp);
1620
1621 if (error) {
1622 /*
1623 * Event can not be delivered.
1624 * Caller must reinsert the event into the queue.
1625 */
1626 pkevp->portkev_flags = flags;
1627 return (error);
1628 }
1629 }
1630 if (free_event)
1631 port_free_event_local(pkevp, 0);
1632 return (0);
1633 }
1634
1635 #ifdef _SYSCALL32_IMPL
1636 /*
1637 * 1. copy kernel event structure to user event structure.
1638 * 2. PORT_KEV_WIRED event structures will be reused by the "source"
1639 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue)
1640 * 4. Other types of event structures can be delivered back to the port cache
1641 * (port_free_event_local()).
1642 * 5. The event source callback function is the last opportunity for the
1643 * event source to update events, to free local resources associated with
1644 * the event or to deny the delivery of the event.
1645 */
1646 static int
port_copy_event32(port_event32_t * puevp,port_kevent_t * pkevp,list_t * list)1647 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list)
1648 {
1649 int free_event = 0;
1650 int error;
1651 int flags;
1652
1653 puevp->portev_source = pkevp->portkev_source;
1654 puevp->portev_object = (daddr32_t)pkevp->portkev_object;
1655 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user;
1656 puevp->portev_events = pkevp->portkev_events;
1657
1658 /* remove event from the queue */
1659 list_remove(list, pkevp);
1660
1661 /*
1662 * Events if type PORT_KEV_WIRED remain allocated by the
1663 * sub-system (source).
1664 */
1665
1666 flags = pkevp->portkev_flags;
1667 if (pkevp->portkev_flags & PORT_KEV_WIRED)
1668 pkevp->portkev_flags &= ~PORT_KEV_DONEQ;
1669 else
1670 free_event = 1;
1671
1672 if (pkevp->portkev_callback != NULL) {
1673 error = (*pkevp->portkev_callback)(pkevp->portkev_arg,
1674 &puevp->portev_events, pkevp->portkev_pid,
1675 PORT_CALLBACK_DEFAULT, pkevp);
1676 if (error) {
1677 /*
1678 * Event can not be delivered.
1679 * Caller must reinsert the event into the queue.
1680 */
1681 pkevp->portkev_flags = flags;
1682 return (error);
1683 }
1684 }
1685 if (free_event)
1686 port_free_event_local(pkevp, 0);
1687 return (0);
1688 }
1689 #endif /* _SYSCALL32_IMPL */
1690
1691 /*
1692 * copyout alert event.
1693 */
1694 static int
port_get_alert(port_alert_t * pa,port_event_t * uevp)1695 port_get_alert(port_alert_t *pa, port_event_t *uevp)
1696 {
1697 model_t model = get_udatamodel();
1698
1699 /* copyout alert event structures to user space */
1700 if (model == DATAMODEL_NATIVE) {
1701 port_event_t uev;
1702 uev.portev_source = PORT_SOURCE_ALERT;
1703 uev.portev_object = pa->portal_object;
1704 uev.portev_events = pa->portal_events;
1705 uev.portev_user = pa->portal_user;
1706 if (copyout(&uev, uevp, sizeof (port_event_t)))
1707 return (EFAULT);
1708 #ifdef _SYSCALL32_IMPL
1709 } else {
1710 port_event32_t uev32;
1711 uev32.portev_source = PORT_SOURCE_ALERT;
1712 uev32.portev_object = (daddr32_t)pa->portal_object;
1713 uev32.portev_events = pa->portal_events;
1714 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user;
1715 if (copyout(&uev32, uevp, sizeof (port_event32_t)))
1716 return (EFAULT);
1717 #endif /* _SYSCALL32_IMPL */
1718 }
1719 return (0);
1720 }
1721
1722 /*
1723 * Check return conditions :
1724 * - pending port close(2)
1725 * - threads waiting for events
1726 */
1727 static void
port_check_return_cond(port_queue_t * portq)1728 port_check_return_cond(port_queue_t *portq)
1729 {
1730 ASSERT(MUTEX_HELD(&portq->portq_mutex));
1731 portq->portq_thrcnt--;
1732 if (portq->portq_flags & PORTQ_CLOSE) {
1733 if (portq->portq_thrcnt == 0)
1734 cv_signal(&portq->portq_closecv);
1735 else
1736 cv_signal(&portq->portq_thread->portget_cv);
1737 }
1738 }
1739
1740 /*
1741 * The port_get_kevent() function returns
1742 * - the event located at the head of the queue if 'last' pointer is NULL
1743 * - the next event after the event pointed by 'last'
1744 * The caller of this function is responsible for the integrity of the queue
1745 * in use:
1746 * - port_getn() is using a temporary queue protected with port_block().
1747 * - port_close_events() is working on the global event queue and protects
1748 * the queue with portq->portq_mutex.
1749 */
1750 port_kevent_t *
port_get_kevent(list_t * list,port_kevent_t * last)1751 port_get_kevent(list_t *list, port_kevent_t *last)
1752 {
1753 if (last == NULL)
1754 return (list_head(list));
1755 else
1756 return (list_next(list, last));
1757 }
1758
1759 /*
1760 * The port_get_timeout() function gets the timeout data from user space
1761 * and converts that info into a corresponding internal representation.
1762 * The kerneldata flag means that the timeout data is already loaded.
1763 */
1764 static int
port_get_timeout(timespec_t * timeout,timespec_t * rqtime,timespec_t ** rqtp,int * blocking,int kerneldata)1765 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp,
1766 int *blocking, int kerneldata)
1767 {
1768 model_t model = get_udatamodel();
1769
1770 *rqtp = NULL;
1771 if (timeout == NULL) {
1772 *blocking = 1;
1773 return (0);
1774 }
1775
1776 if (kerneldata) {
1777 *rqtime = *timeout;
1778 } else {
1779 if (model == DATAMODEL_NATIVE) {
1780 if (copyin(timeout, rqtime, sizeof (*rqtime)))
1781 return (EFAULT);
1782 #ifdef _SYSCALL32_IMPL
1783 } else {
1784 timespec32_t wait_time_32;
1785 if (copyin(timeout, &wait_time_32,
1786 sizeof (wait_time_32)))
1787 return (EFAULT);
1788 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
1789 #endif /* _SYSCALL32_IMPL */
1790 }
1791 }
1792
1793 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
1794 *blocking = 0;
1795 return (0);
1796 }
1797
1798 if (rqtime->tv_sec < 0 ||
1799 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
1800 return (EINVAL);
1801
1802 *rqtp = rqtime;
1803 *blocking = 1;
1804 return (0);
1805 }
1806
1807 /*
1808 * port_queue_thread()
1809 * Threads requiring more events than available will be put in a wait queue.
1810 * There is a "thread wait queue" per port.
1811 * Threads requiring less events get a higher priority than others and they
1812 * will be awoken first.
1813 */
1814 static portget_t *
port_queue_thread(port_queue_t * portq,uint_t nget)1815 port_queue_thread(port_queue_t *portq, uint_t nget)
1816 {
1817 portget_t *pgetp;
1818 portget_t *ttp;
1819 portget_t *htp;
1820
1821 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP);
1822 pgetp->portget_nget = nget;
1823 pgetp->portget_pid = curproc->p_pid;
1824 if (portq->portq_thread == NULL) {
1825 /* first waiting thread */
1826 portq->portq_thread = pgetp;
1827 portq->portq_nget = nget;
1828 pgetp->portget_prev = pgetp;
1829 pgetp->portget_next = pgetp;
1830 return (pgetp);
1831 }
1832
1833 /*
1834 * thread waiting for less events will be set on top of the queue.
1835 */
1836 ttp = portq->portq_thread;
1837 htp = ttp;
1838 for (;;) {
1839 if (nget <= ttp->portget_nget)
1840 break;
1841 if (htp == ttp->portget_next)
1842 break; /* last event */
1843 ttp = ttp->portget_next;
1844 }
1845
1846 /* add thread to the queue */
1847 pgetp->portget_next = ttp;
1848 pgetp->portget_prev = ttp->portget_prev;
1849 ttp->portget_prev->portget_next = pgetp;
1850 ttp->portget_prev = pgetp;
1851 if (portq->portq_thread == ttp)
1852 portq->portq_thread = pgetp;
1853 portq->portq_nget = portq->portq_thread->portget_nget;
1854 return (pgetp);
1855 }
1856
1857 /*
1858 * Take thread out of the queue.
1859 */
1860 static void
port_dequeue_thread(port_queue_t * portq,portget_t * pgetp)1861 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp)
1862 {
1863 if (pgetp->portget_next == pgetp) {
1864 /* last (single) waiting thread */
1865 portq->portq_thread = NULL;
1866 portq->portq_nget = 0;
1867 } else {
1868 pgetp->portget_prev->portget_next = pgetp->portget_next;
1869 pgetp->portget_next->portget_prev = pgetp->portget_prev;
1870 if (portq->portq_thread == pgetp)
1871 portq->portq_thread = pgetp->portget_next;
1872 portq->portq_nget = portq->portq_thread->portget_nget;
1873 }
1874 kmem_free(pgetp, sizeof (portget_t));
1875 }
1876
1877 /*
1878 * Set up event port kstats.
1879 */
1880 static void
port_kstat_init()1881 port_kstat_init()
1882 {
1883 kstat_t *ksp;
1884 uint_t ndata;
1885
1886 ndata = sizeof (port_kstat) / sizeof (kstat_named_t);
1887 ksp = kstat_create("portfs", 0, "Event Ports", "misc",
1888 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL);
1889 if (ksp) {
1890 ksp->ks_data = &port_kstat;
1891 kstat_install(ksp);
1892 }
1893 }
1894