xref: /illumos-gate/usr/src/lib/libc/port/gen/select.c (revision 338d6fc1b322c01b220f204edde962e843478a78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * Emulation of select() system call using poll() system call.
32  *
33  * Assumptions:
34  *	polling for input only is most common.
35  *	polling for exceptional conditions is very rare.
36  *
37  * Note that is it not feasible to emulate all error conditions,
38  * in particular conditions that would return EFAULT are far too
39  * difficult to check for in a library routine.
40  */
41 
42 #pragma weak _select = select
43 
44 #include "lint.h"
45 #include <values.h>
46 #include <pthread.h>
47 #include <errno.h>
48 #include <stdlib.h>
49 #include <sys/time.h>
50 #include <sys/types.h>
51 #include <sys/select.h>
52 #include <sys/poll.h>
53 #include <alloca.h>
54 #include "libc.h"
55 
56 /*
57  * STACK_PFD_LIM
58  *
59  *   The limit at which pselect allocates pollfd structures in the heap,
60  *   rather than on the stack.  These limits match the historical behaviour
61  *   with the * _large_fdset implementations.
62  *
63  * BULK_ALLOC_LIM
64  *
65  *   The limit below which we'll just allocate nfds pollfds, rather than
66  *   counting how many we actually need.
67  */
68 #if defined(_LP64)
69 #define	STACK_PFD_LIM	FD_SETSIZE
70 #define	BULK_ALLOC_LIM	8192
71 #else
72 #define	STACK_PFD_LIM	1024
73 #define	BULK_ALLOC_LIM	1024
74 #endif
75 
76 /*
77  * The previous _large_fdset implementations are, unfortunately, baked into
78  * the ABI.
79  */
80 #pragma weak select_large_fdset = select
81 #pragma weak pselect_large_fdset = pselect
82 
83 #define	fd_set_size(nfds)	(((nfds) + (NFDBITS - 1)) / NFDBITS)
84 
85 static nfds_t
86 fd_sets_count(int limit, fd_set *in, fd_set *out, fd_set *ex)
87 {
88 	nfds_t total = 0;
89 
90 	if (limit <= 0)
91 		return (0);
92 
93 	for (int i = 0; i < fd_set_size(limit); i++) {
94 		long v = (in->fds_bits[i] | out->fds_bits[i] | ex->fds_bits[i]);
95 
96 		while (v != 0) {
97 			v &= v - 1;
98 			total++;
99 		}
100 	}
101 
102 	return (total);
103 }
104 
105 int
106 pselect(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
107     const timespec_t *tsp, const sigset_t *sigmask)
108 {
109 	long *in, *out, *ex;
110 	ulong_t m;	/* bit mask */
111 	int j;		/* loop counter */
112 	ulong_t b;	/* bits to test */
113 	int n, rv;
114 	struct pollfd *pfd;
115 	struct pollfd *p;
116 	int lastj = -1;
117 	nfds_t npfds = 0;
118 	boolean_t heap_pfds = B_FALSE;
119 
120 	/* "zero" is read-only, it could go in the text segment */
121 	static fd_set zero = { 0 };
122 
123 	/*
124 	 * Check for invalid conditions at outset.
125 	 * Required for spec1170.
126 	 * SUSV3: We must behave as a cancellation point even if we fail early.
127 	 */
128 	if (nfds < 0 || nfds > FD_SETSIZE) {
129 		pthread_testcancel();
130 		errno = EINVAL;
131 		return (-1);
132 	}
133 
134 	if (tsp != NULL) {
135 		/* check timespec validity */
136 		if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
137 		    tsp->tv_sec < 0) {
138 			pthread_testcancel();
139 			errno = EINVAL;
140 			return (-1);
141 		}
142 	}
143 
144 	/*
145 	 * If any input args are null, point them at the null array.
146 	 */
147 	if (in0 == NULL)
148 		in0 = &zero;
149 	if (out0 == NULL)
150 		out0 = &zero;
151 	if (ex0 == NULL)
152 		ex0 = &zero;
153 
154 	if (nfds <= BULK_ALLOC_LIM) {
155 		p = pfd = alloca(nfds * sizeof (struct pollfd));
156 	} else {
157 		npfds = fd_sets_count(nfds, in0, out0, ex0);
158 
159 		if (npfds > STACK_PFD_LIM) {
160 			p = pfd = malloc(npfds * sizeof (struct pollfd));
161 			if (p == NULL)
162 				return (-1);
163 			heap_pfds = B_TRUE;
164 		} else {
165 			p = pfd = alloca(npfds * sizeof (struct pollfd));
166 		}
167 	}
168 
169 	/*
170 	 * For each fd, if any bits are set convert them into
171 	 * the appropriate pollfd struct.
172 	 */
173 	in = (long *)in0->fds_bits;
174 	out = (long *)out0->fds_bits;
175 	ex = (long *)ex0->fds_bits;
176 	for (n = 0; n < nfds; n += NFDBITS) {
177 		b = (ulong_t)(*in | *out | *ex);
178 		for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
179 			if (b & 1) {
180 				p->fd = n + j;
181 				if (p->fd >= nfds)
182 					goto done;
183 				p->events = 0;
184 				if (*in & m)
185 					p->events |= POLLRDNORM;
186 				if (*out & m)
187 					p->events |= POLLWRNORM;
188 				if (*ex & m)
189 					p->events |= POLLRDBAND;
190 				p++;
191 			}
192 		}
193 		in++;
194 		out++;
195 		ex++;
196 	}
197 done:
198 	/*
199 	 * Now do the poll.
200 	 */
201 	npfds = (int)(p - pfd);
202 	do {
203 		rv = _pollsys(pfd, npfds, tsp, sigmask);
204 	} while (rv < 0 && errno == EAGAIN);
205 
206 	if (rv < 0)		/* no need to set bit masks */
207 		goto out;
208 
209 	if (rv == 0) {
210 		/*
211 		 * Clear out bit masks, just in case.
212 		 * On the assumption that usually only
213 		 * one bit mask is set, use three loops.
214 		 */
215 		if (in0 != &zero) {
216 			in = (long *)in0->fds_bits;
217 			for (n = 0; n < nfds; n += NFDBITS)
218 				*in++ = 0;
219 		}
220 		if (out0 != &zero) {
221 			out = (long *)out0->fds_bits;
222 			for (n = 0; n < nfds; n += NFDBITS)
223 				*out++ = 0;
224 		}
225 		if (ex0 != &zero) {
226 			ex = (long *)ex0->fds_bits;
227 			for (n = 0; n < nfds; n += NFDBITS)
228 				*ex++ = 0;
229 		}
230 		rv = 0;
231 		goto out;
232 	}
233 
234 	/*
235 	 * Check for EINVAL error case first to avoid changing any bits
236 	 * if we're going to return an error.
237 	 */
238 	for (p = pfd, n = npfds; n-- > 0; p++) {
239 		/*
240 		 * select will return EBADF immediately if any fd's
241 		 * are bad.  poll will complete the poll on the
242 		 * rest of the fd's and include the error indication
243 		 * in the returned bits.  This is a rare case so we
244 		 * accept this difference and return the error after
245 		 * doing more work than select would've done.
246 		 */
247 		if (p->revents & POLLNVAL) {
248 			errno = EBADF;
249 			rv = -1;
250 			goto out;
251 		}
252 		/*
253 		 * We would like to make POLLHUP available to select,
254 		 * checking to see if we have pending data to be read.
255 		 * BUT until we figure out how not to break Xsun's
256 		 * dependencies on select's existing features...
257 		 * This is what we _thought_ would work ... sigh!
258 		 */
259 		/*
260 		 * if ((p->revents & POLLHUP) &&
261 		 *	!(p->revents & (POLLRDNORM|POLLRDBAND))) {
262 		 *	errno = EINTR;
263 		 *	rv = -1;
264 		 *	goto out;
265 		 * }
266 		 */
267 	}
268 
269 	/*
270 	 * Convert results of poll back into bits
271 	 * in the argument arrays.
272 	 *
273 	 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
274 	 * on return from poll if they were set on input, thus we don't
275 	 * worry about accidentally setting the corresponding bits in the
276 	 * zero array if the input bit masks were null.
277 	 *
278 	 * Must return number of bits set, not number of ready descriptors
279 	 * (as the man page says, and as poll() does).
280 	 */
281 	rv = 0;
282 	for (p = pfd, n = npfds; n-- > 0; p++) {
283 		j = (int)(p->fd / NFDBITS);
284 		/* have we moved into another word of the bit mask yet? */
285 		if (j != lastj) {
286 			/* clear all output bits to start with */
287 			in = (long *)&in0->fds_bits[j];
288 			out = (long *)&out0->fds_bits[j];
289 			ex = (long *)&ex0->fds_bits[j];
290 			/*
291 			 * In case we made "zero" read-only (e.g., with
292 			 * cc -R), avoid actually storing into it.
293 			 */
294 			if (in0 != &zero)
295 				*in = 0;
296 			if (out0 != &zero)
297 				*out = 0;
298 			if (ex0 != &zero)
299 				*ex = 0;
300 			lastj = j;
301 		}
302 		if (p->revents) {
303 			m = 1L << (p->fd % NFDBITS);
304 			if (p->revents & POLLRDNORM) {
305 				*in |= m;
306 				rv++;
307 			}
308 			if (p->revents & POLLWRNORM) {
309 				*out |= m;
310 				rv++;
311 			}
312 			if (p->revents & POLLRDBAND) {
313 				*ex |= m;
314 				rv++;
315 			}
316 			/*
317 			 * Only set this bit on return if we asked about
318 			 * input conditions.
319 			 */
320 			if ((p->revents & (POLLHUP|POLLERR)) &&
321 			    (p->events & POLLRDNORM)) {
322 				if ((*in & m) == 0)
323 					rv++;	/* wasn't already set */
324 				*in |= m;
325 			}
326 			/*
327 			 * Only set this bit on return if we asked about
328 			 * output conditions.
329 			 */
330 			if ((p->revents & (POLLHUP|POLLERR)) &&
331 			    (p->events & POLLWRNORM)) {
332 				if ((*out & m) == 0)
333 					rv++;	/* wasn't already set */
334 				*out |= m;
335 			}
336 			/*
337 			 * Only set this bit on return if we asked about
338 			 * output conditions.
339 			 */
340 			if ((p->revents & (POLLHUP|POLLERR)) &&
341 			    (p->events & POLLRDBAND)) {
342 				if ((*ex & m) == 0)
343 					rv++;	/* wasn't already set */
344 				*ex |= m;
345 			}
346 		}
347 	}
348 out:
349 	if (heap_pfds)
350 		free(pfd);
351 	return (rv);
352 }
353 
354 int
355 select(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, struct timeval *tv)
356 {
357 	timespec_t ts;
358 	timespec_t *tsp;
359 
360 	if (tv == NULL)
361 		tsp = NULL;
362 	else {
363 		/* check timeval validity */
364 		if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
365 			errno = EINVAL;
366 			return (-1);
367 		}
368 		/*
369 		 * Convert timeval to timespec.
370 		 * To preserve compatibility with past behavior,
371 		 * when select was built upon poll(2), which has a
372 		 * minimum non-zero timeout of 1 millisecond, force
373 		 * a minimum non-zero timeout of 500 microseconds.
374 		 */
375 		ts.tv_sec = tv->tv_sec;
376 		ts.tv_nsec = tv->tv_usec * 1000;
377 		if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
378 			ts.tv_nsec = 500000;
379 		tsp = &ts;
380 	}
381 
382 	return (pselect(nfds, in0, out0, ex0, tsp, NULL));
383 }
384