1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * Emulation of select() system call using poll() system call.
32 *
33 * Assumptions:
34 * polling for input only is most common.
35 * polling for exceptional conditions is very rare.
36 *
37 * Note that is it not feasible to emulate all error conditions,
38 * in particular conditions that would return EFAULT are far too
39 * difficult to check for in a library routine.
40 */
41
42 #pragma weak _select = select
43
44 #include "lint.h"
45 #include <values.h>
46 #include <pthread.h>
47 #include <errno.h>
48 #include <stdlib.h>
49 #include <sys/time.h>
50 #include <sys/types.h>
51 #include <sys/select.h>
52 #include <sys/poll.h>
53 #include <alloca.h>
54 #include "libc.h"
55
56 /*
57 * STACK_PFD_LIM
58 *
59 * The limit at which pselect allocates pollfd structures in the heap,
60 * rather than on the stack. These limits match the historical behaviour
61 * with the * _large_fdset implementations.
62 *
63 * BULK_ALLOC_LIM
64 *
65 * The limit below which we'll just allocate nfds pollfds, rather than
66 * counting how many we actually need.
67 */
68 #if defined(_LP64)
69 #define STACK_PFD_LIM FD_SETSIZE
70 #define BULK_ALLOC_LIM 8192
71 #else
72 #define STACK_PFD_LIM 1024
73 #define BULK_ALLOC_LIM 1024
74 #endif
75
76 /*
77 * The previous _large_fdset implementations are, unfortunately, baked into
78 * the ABI.
79 */
80 #pragma weak select_large_fdset = select
81 #pragma weak pselect_large_fdset = pselect
82
83 #define fd_set_size(nfds) (((nfds) + (NFDBITS - 1)) / NFDBITS)
84
85 static nfds_t
fd_sets_count(int limit,fd_set * in,fd_set * out,fd_set * ex)86 fd_sets_count(int limit, fd_set *in, fd_set *out, fd_set *ex)
87 {
88 nfds_t total = 0;
89
90 if (limit <= 0)
91 return (0);
92
93 for (int i = 0; i < fd_set_size(limit); i++) {
94 long v = (in->fds_bits[i] | out->fds_bits[i] | ex->fds_bits[i]);
95
96 while (v != 0) {
97 v &= v - 1;
98 total++;
99 }
100 }
101
102 return (total);
103 }
104
105 int
pselect(int nfds,fd_set * in0,fd_set * out0,fd_set * ex0,const timespec_t * tsp,const sigset_t * sigmask)106 pselect(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
107 const timespec_t *tsp, const sigset_t *sigmask)
108 {
109 long *in, *out, *ex;
110 ulong_t m; /* bit mask */
111 int j; /* loop counter */
112 ulong_t b; /* bits to test */
113 int n, rv;
114 struct pollfd *pfd;
115 struct pollfd *p;
116 int lastj = -1;
117 nfds_t npfds = 0;
118 boolean_t heap_pfds = B_FALSE;
119
120 /* "zero" is read-only, it could go in the text segment */
121 static fd_set zero = { 0 };
122
123 /*
124 * Check for invalid conditions at outset.
125 * Required for spec1170.
126 * SUSV3: We must behave as a cancellation point even if we fail early.
127 */
128 if (nfds < 0 || nfds > FD_SETSIZE) {
129 pthread_testcancel();
130 errno = EINVAL;
131 return (-1);
132 }
133
134 if (tsp != NULL) {
135 /* check timespec validity */
136 if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
137 tsp->tv_sec < 0) {
138 pthread_testcancel();
139 errno = EINVAL;
140 return (-1);
141 }
142 }
143
144 /*
145 * If any input args are null, point them at the null array.
146 */
147 if (in0 == NULL)
148 in0 = &zero;
149 if (out0 == NULL)
150 out0 = &zero;
151 if (ex0 == NULL)
152 ex0 = &zero;
153
154 if (nfds <= BULK_ALLOC_LIM) {
155 p = pfd = alloca(nfds * sizeof (struct pollfd));
156 } else {
157 npfds = fd_sets_count(nfds, in0, out0, ex0);
158
159 if (npfds > STACK_PFD_LIM) {
160 p = pfd = malloc(npfds * sizeof (struct pollfd));
161 if (p == NULL)
162 return (-1);
163 heap_pfds = B_TRUE;
164 } else {
165 p = pfd = alloca(npfds * sizeof (struct pollfd));
166 }
167 }
168
169 /*
170 * For each fd, if any bits are set convert them into
171 * the appropriate pollfd struct.
172 */
173 in = (long *)in0->fds_bits;
174 out = (long *)out0->fds_bits;
175 ex = (long *)ex0->fds_bits;
176 for (n = 0; n < nfds; n += NFDBITS) {
177 b = (ulong_t)(*in | *out | *ex);
178 for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
179 if (b & 1) {
180 p->fd = n + j;
181 if (p->fd >= nfds)
182 goto done;
183 p->events = 0;
184 if (*in & m)
185 p->events |= POLLRDNORM;
186 if (*out & m)
187 p->events |= POLLWRNORM;
188 if (*ex & m)
189 p->events |= POLLRDBAND;
190 p++;
191 }
192 }
193 in++;
194 out++;
195 ex++;
196 }
197 done:
198 /*
199 * Now do the poll.
200 */
201 npfds = (int)(p - pfd);
202 do {
203 rv = _pollsys(pfd, npfds, tsp, sigmask);
204 } while (rv < 0 && errno == EAGAIN);
205
206 if (rv < 0) /* no need to set bit masks */
207 goto out;
208
209 if (rv == 0) {
210 /*
211 * Clear out bit masks, just in case.
212 * On the assumption that usually only
213 * one bit mask is set, use three loops.
214 */
215 if (in0 != &zero) {
216 in = (long *)in0->fds_bits;
217 for (n = 0; n < nfds; n += NFDBITS)
218 *in++ = 0;
219 }
220 if (out0 != &zero) {
221 out = (long *)out0->fds_bits;
222 for (n = 0; n < nfds; n += NFDBITS)
223 *out++ = 0;
224 }
225 if (ex0 != &zero) {
226 ex = (long *)ex0->fds_bits;
227 for (n = 0; n < nfds; n += NFDBITS)
228 *ex++ = 0;
229 }
230 rv = 0;
231 goto out;
232 }
233
234 /*
235 * Check for EINVAL error case first to avoid changing any bits
236 * if we're going to return an error.
237 */
238 for (p = pfd, n = npfds; n-- > 0; p++) {
239 /*
240 * select will return EBADF immediately if any fd's
241 * are bad. poll will complete the poll on the
242 * rest of the fd's and include the error indication
243 * in the returned bits. This is a rare case so we
244 * accept this difference and return the error after
245 * doing more work than select would've done.
246 */
247 if (p->revents & POLLNVAL) {
248 errno = EBADF;
249 rv = -1;
250 goto out;
251 }
252 /*
253 * We would like to make POLLHUP available to select,
254 * checking to see if we have pending data to be read.
255 * BUT until we figure out how not to break Xsun's
256 * dependencies on select's existing features...
257 * This is what we _thought_ would work ... sigh!
258 */
259 /*
260 * if ((p->revents & POLLHUP) &&
261 * !(p->revents & (POLLRDNORM|POLLRDBAND))) {
262 * errno = EINTR;
263 * rv = -1;
264 * goto out;
265 * }
266 */
267 }
268
269 /*
270 * Convert results of poll back into bits
271 * in the argument arrays.
272 *
273 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
274 * on return from poll if they were set on input, thus we don't
275 * worry about accidentally setting the corresponding bits in the
276 * zero array if the input bit masks were null.
277 *
278 * Must return number of bits set, not number of ready descriptors
279 * (as the man page says, and as poll() does).
280 */
281 rv = 0;
282 for (p = pfd, n = npfds; n-- > 0; p++) {
283 j = (int)(p->fd / NFDBITS);
284 /* have we moved into another word of the bit mask yet? */
285 if (j != lastj) {
286 /* clear all output bits to start with */
287 in = (long *)&in0->fds_bits[j];
288 out = (long *)&out0->fds_bits[j];
289 ex = (long *)&ex0->fds_bits[j];
290 /*
291 * In case we made "zero" read-only (e.g., with
292 * cc -R), avoid actually storing into it.
293 */
294 if (in0 != &zero)
295 *in = 0;
296 if (out0 != &zero)
297 *out = 0;
298 if (ex0 != &zero)
299 *ex = 0;
300 lastj = j;
301 }
302 if (p->revents) {
303 m = 1L << (p->fd % NFDBITS);
304 if (p->revents & POLLRDNORM) {
305 *in |= m;
306 rv++;
307 }
308 if (p->revents & POLLWRNORM) {
309 *out |= m;
310 rv++;
311 }
312 if (p->revents & POLLRDBAND) {
313 *ex |= m;
314 rv++;
315 }
316 /*
317 * Only set this bit on return if we asked about
318 * input conditions.
319 */
320 if ((p->revents & (POLLHUP|POLLERR)) &&
321 (p->events & POLLRDNORM)) {
322 if ((*in & m) == 0)
323 rv++; /* wasn't already set */
324 *in |= m;
325 }
326 /*
327 * Only set this bit on return if we asked about
328 * output conditions.
329 */
330 if ((p->revents & (POLLHUP|POLLERR)) &&
331 (p->events & POLLWRNORM)) {
332 if ((*out & m) == 0)
333 rv++; /* wasn't already set */
334 *out |= m;
335 }
336 /*
337 * Only set this bit on return if we asked about
338 * output conditions.
339 */
340 if ((p->revents & (POLLHUP|POLLERR)) &&
341 (p->events & POLLRDBAND)) {
342 if ((*ex & m) == 0)
343 rv++; /* wasn't already set */
344 *ex |= m;
345 }
346 }
347 }
348 out:
349 if (heap_pfds)
350 free(pfd);
351 return (rv);
352 }
353
354 int
select(int nfds,fd_set * in0,fd_set * out0,fd_set * ex0,struct timeval * tv)355 select(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, struct timeval *tv)
356 {
357 timespec_t ts;
358 timespec_t *tsp;
359
360 if (tv == NULL)
361 tsp = NULL;
362 else {
363 /* check timeval validity */
364 if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
365 errno = EINVAL;
366 return (-1);
367 }
368 /*
369 * Convert timeval to timespec.
370 * To preserve compatibility with past behavior,
371 * when select was built upon poll(2), which has a
372 * minimum non-zero timeout of 1 millisecond, force
373 * a minimum non-zero timeout of 500 microseconds.
374 */
375 ts.tv_sec = tv->tv_sec;
376 ts.tv_nsec = tv->tv_usec * 1000;
377 if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
378 ts.tv_nsec = 500000;
379 tsp = &ts;
380 }
381
382 return (pselect(nfds, in0, out0, ex0, tsp, NULL));
383 }
384