1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 /*
33 * Emulation of select() system call using _pollsys() system call.
34 *
35 * Assumptions:
36 * polling for input only is most common.
37 * polling for exceptional conditions is very rare.
38 *
39 * Note that is it not feasible to emulate all error conditions,
40 * in particular conditions that would return EFAULT are far too
41 * difficult to check for in a library routine.
42 *
43 * This is the alternate large fd_set select.
44 *
45 */
46
47 /*
48 * Must precede any include files
49 */
50 #ifdef FD_SETSIZE
51 #undef FD_SETSIZE
52 #endif
53 #define FD_SETSIZE 65536
54
55 #include "lint.h"
56 #include <values.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <pthread.h>
60 #include <errno.h>
61 #include <sys/time.h>
62 #include <sys/types.h>
63 #include <sys/poll.h>
64 #include <string.h>
65 #include <stdlib.h>
66 #include "libc.h"
67
68 #define DEFAULT_POLL_SIZE 64
69
70 static struct pollfd *realloc_fds(int *, struct pollfd **, struct pollfd *);
71
72 int
pselect_large_fdset(int nfds,fd_set * in0,fd_set * out0,fd_set * ex0,const timespec_t * tsp,const sigset_t * sigmask)73 pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
74 const timespec_t *tsp, const sigset_t *sigmask)
75 {
76 long *in, *out, *ex;
77 ulong_t m; /* bit mask */
78 int j; /* loop counter */
79 ulong_t b; /* bits to test */
80 int n, rv;
81 int lastj = -1;
82 int nused;
83
84 /*
85 * Rather than have a mammoth pollfd (65K) list on the stack
86 * we start with a small one and then malloc larger chunks
87 * on the heap if necessary.
88 */
89
90 struct pollfd pfd[DEFAULT_POLL_SIZE];
91 struct pollfd *p;
92 struct pollfd *pfd_list;
93 int nfds_on_list;
94
95 fd_set zero;
96
97 /*
98 * Check for invalid conditions at outset.
99 * Required for spec1170.
100 * SUSV3: We must behave as a cancellation point even if we fail early.
101 */
102 if (nfds >= 0 && nfds <= FD_SETSIZE) {
103 if (tsp != NULL) {
104 if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
105 tsp->tv_sec < 0) {
106 pthread_testcancel();
107 errno = EINVAL;
108 return (-1);
109 }
110 }
111 } else {
112 pthread_testcancel();
113 errno = EINVAL;
114 return (-1);
115 }
116
117 /*
118 * If any input args are null, point them at the null array.
119 */
120 (void) memset(&zero, 0, sizeof (fd_set));
121 if (in0 == NULL)
122 in0 = &zero;
123 if (out0 == NULL)
124 out0 = &zero;
125 if (ex0 == NULL)
126 ex0 = &zero;
127
128 nfds_on_list = DEFAULT_POLL_SIZE;
129 pfd_list = pfd;
130 p = pfd_list;
131 (void) memset(pfd, 0, sizeof (pfd));
132 /*
133 * For each fd, if any bits are set convert them into
134 * the appropriate pollfd struct.
135 */
136 in = (long *)in0->fds_bits;
137 out = (long *)out0->fds_bits;
138 ex = (long *)ex0->fds_bits;
139 nused = 0;
140 /*
141 * nused reflects the number of pollfd structs currently used
142 * less one. If realloc_fds returns NULL it is because malloc
143 * failed. We expect malloc() to have done the proper
144 * thing with errno.
145 */
146 for (n = 0; n < nfds; n += NFDBITS) {
147 b = (ulong_t)(*in | *out | *ex);
148 for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
149 if (b & 1) {
150 p->fd = n + j;
151 if (p->fd < nfds) {
152 p->events = 0;
153 if (*in & m)
154 p->events |= POLLRDNORM;
155 if (*out & m)
156 p->events |= POLLWRNORM;
157 if (*ex & m)
158 p->events |= POLLRDBAND;
159 if (nused < (nfds_on_list - 1)) {
160 p++;
161 } else if ((p = realloc_fds(
162 &nfds_on_list, &pfd_list, pfd))
163 == NULL) {
164 if (pfd_list != pfd)
165 free(pfd_list);
166 pthread_testcancel();
167 return (-1);
168 }
169 nused++;
170 } else
171 goto done;
172 }
173 }
174 in++;
175 out++;
176 ex++;
177 }
178 done:
179 /*
180 * Now do the poll.
181 */
182 do {
183 rv = _pollsys(pfd_list, (nfds_t)nused, tsp, sigmask);
184 } while (rv < 0 && errno == EAGAIN);
185
186 if (rv < 0) { /* no need to set bit masks */
187 if (pfd_list != pfd)
188 free(pfd_list);
189 return (rv);
190 } else if (rv == 0) {
191 /*
192 * Clear out bit masks, just in case.
193 * On the assumption that usually only
194 * one bit mask is set, use three loops.
195 */
196 if (in0 != &zero) {
197 in = (long *)in0->fds_bits;
198 for (n = 0; n < nfds; n += NFDBITS)
199 *in++ = 0;
200 }
201 if (out0 != &zero) {
202 out = (long *)out0->fds_bits;
203 for (n = 0; n < nfds; n += NFDBITS)
204 *out++ = 0;
205 }
206 if (ex0 != &zero) {
207 ex = (long *)ex0->fds_bits;
208 for (n = 0; n < nfds; n += NFDBITS)
209 *ex++ = 0;
210 }
211 if (pfd_list != pfd)
212 free(pfd_list);
213 return (0);
214 }
215
216 /*
217 * Check for EINVAL error case first to avoid changing any bits
218 * if we're going to return an error.
219 */
220 for (p = pfd_list, j = nused; j-- > 0; p++) {
221 /*
222 * select will return EBADF immediately if any fd's
223 * are bad. poll will complete the poll on the
224 * rest of the fd's and include the error indication
225 * in the returned bits. This is a rare case so we
226 * accept this difference and return the error after
227 * doing more work than select would've done.
228 */
229 if (p->revents & POLLNVAL) {
230 errno = EBADF;
231 if (pfd_list != pfd)
232 free(pfd_list);
233 return (-1);
234 }
235 /*
236 * We would like to make POLLHUP available to select,
237 * checking to see if we have pending data to be read.
238 * BUT until we figure out how not to break Xsun's
239 * dependencies on select's existing features...
240 * This is what we _thought_ would work ... sigh!
241 */
242 /*
243 * if ((p->revents & POLLHUP) &&
244 * !(p->revents & (POLLRDNORM|POLLRDBAND))) {
245 * errno = EINTR;
246 * return (-1);
247 * }
248 */
249 }
250
251 /*
252 * Convert results of poll back into bits
253 * in the argument arrays.
254 *
255 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
256 * on return from poll if they were set on input, thus we don't
257 * worry about accidentally setting the corresponding bits in the
258 * zero array if the input bit masks were null.
259 *
260 * Must return number of bits set, not number of ready descriptors
261 * (as the man page says, and as poll() does).
262 */
263 rv = 0;
264 for (p = pfd_list; nused-- > 0; p++) {
265 j = (int)(p->fd / NFDBITS);
266 /* have we moved into another word of the bit mask yet? */
267 if (j != lastj) {
268 /* clear all output bits to start with */
269 in = (long *)&in0->fds_bits[j];
270 out = (long *)&out0->fds_bits[j];
271 ex = (long *)&ex0->fds_bits[j];
272 /*
273 * In case we made "zero" read-only (e.g., with
274 * cc -R), avoid actually storing into it.
275 */
276 if (in0 != &zero)
277 *in = 0;
278 if (out0 != &zero)
279 *out = 0;
280 if (ex0 != &zero)
281 *ex = 0;
282 lastj = j;
283 }
284 if (p->revents) {
285 m = 1L << (p->fd % NFDBITS);
286 if (p->revents & POLLRDNORM) {
287 *in |= m;
288 rv++;
289 }
290 if (p->revents & POLLWRNORM) {
291 *out |= m;
292 rv++;
293 }
294 if (p->revents & POLLRDBAND) {
295 *ex |= m;
296 rv++;
297 }
298 /*
299 * Only set this bit on return if we asked about
300 * input conditions.
301 */
302 if ((p->revents & (POLLHUP|POLLERR)) &&
303 (p->events & POLLRDNORM)) {
304 if ((*in & m) == 0)
305 rv++; /* wasn't already set */
306 *in |= m;
307 }
308 /*
309 * Only set this bit on return if we asked about
310 * output conditions.
311 */
312 if ((p->revents & (POLLHUP|POLLERR)) &&
313 (p->events & POLLWRNORM)) {
314 if ((*out & m) == 0)
315 rv++; /* wasn't already set */
316 *out |= m;
317 }
318 /*
319 * Only set this bit on return if we asked about
320 * output conditions.
321 */
322 if ((p->revents & (POLLHUP|POLLERR)) &&
323 (p->events & POLLRDBAND)) {
324 if ((*ex & m) == 0)
325 rv++; /* wasn't already set */
326 *ex |= m;
327 }
328 }
329 }
330 if (pfd_list != pfd)
331 free(pfd_list);
332 return (rv);
333 }
334
335 int
select_large_fdset(int nfds,fd_set * in0,fd_set * out0,fd_set * ex0,struct timeval * tv)336 select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
337 struct timeval *tv)
338 {
339 timespec_t ts;
340 timespec_t *tsp;
341
342 if (tv == NULL)
343 tsp = NULL;
344 else {
345 /* check timeval validity */
346 if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
347 errno = EINVAL;
348 return (-1);
349 }
350 /*
351 * Convert timeval to timespec.
352 * To preserve compatibility with past behavior,
353 * when select was built upon poll(2), which has a
354 * minimum non-zero timeout of 1 millisecond, force
355 * a minimum non-zero timeout of 500 microseconds.
356 */
357 ts.tv_sec = tv->tv_sec;
358 ts.tv_nsec = tv->tv_usec * 1000;
359 if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
360 ts.tv_nsec = 500000;
361 tsp = &ts;
362 }
363
364 return (pselect_large_fdset(nfds, in0, out0, ex0, tsp, NULL));
365 }
366
367 /*
368 * Reallocate buffers of pollfds for our list. We malloc a new buffer
369 * and, in the case where the old buffer does not match what is passed
370 * in orig, free the buffer after copying the contents.
371 */
372 struct pollfd *
realloc_fds(int * num,struct pollfd ** list_head,struct pollfd * orig)373 realloc_fds(int *num, struct pollfd **list_head, struct pollfd *orig)
374 {
375 struct pollfd *b;
376 int nta;
377 int n2;
378
379 n2 = *num * 2;
380 nta = n2 * sizeof (struct pollfd);
381 b = malloc(nta);
382 if (b) {
383 (void) memset(b, 0, (size_t)nta);
384 (void) memcpy(b, *list_head, nta / 2);
385 if (*list_head != orig)
386 free(*list_head);
387 *list_head = b;
388 b += *num;
389 *num = n2;
390 }
391 return (b);
392 }
393