xref: /titanic_50/usr/src/lib/libc/port/gen/select_large_fdset.c (revision 7257d1b4d25bfac0c802847390e98a464fd787ac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * Emulation of select() system call using _pollsys() system call.
34  *
35  * Assumptions:
36  *	polling for input only is most common.
37  *	polling for exceptional conditions is very rare.
38  *
39  * Note that is it not feasible to emulate all error conditions,
40  * in particular conditions that would return EFAULT are far too
41  * difficult to check for in a library routine.
42  *
43  * This is the alternate large fd_set select.
44  *
45  */
46 
47 /*
48  * Must precede any include files
49  */
50 #ifdef FD_SETSIZE
51 #undef FD_SETSIZE
52 #endif
53 #define	FD_SETSIZE 65536
54 
55 #include "lint.h"
56 #include <values.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #include <pthread.h>
60 #include <errno.h>
61 #include <sys/time.h>
62 #include <sys/types.h>
63 #include <sys/poll.h>
64 #include <string.h>
65 #include <stdlib.h>
66 #include "libc.h"
67 
68 #define	DEFAULT_POLL_SIZE 64
69 
70 static struct pollfd *realloc_fds(int *, struct pollfd **, struct pollfd *);
71 
72 int
pselect_large_fdset(int nfds,fd_set * in0,fd_set * out0,fd_set * ex0,const timespec_t * tsp,const sigset_t * sigmask)73 pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
74 	const timespec_t *tsp, const sigset_t *sigmask)
75 {
76 	long *in, *out, *ex;
77 	ulong_t m;	/* bit mask */
78 	int j;		/* loop counter */
79 	ulong_t b;	/* bits to test */
80 	int n, rv;
81 	int lastj = -1;
82 	int nused;
83 
84 	/*
85 	 * Rather than have a mammoth pollfd (65K) list on the stack
86 	 * we start with a small one and then malloc larger chunks
87 	 * on the heap if necessary.
88 	 */
89 
90 	struct pollfd pfd[DEFAULT_POLL_SIZE];
91 	struct pollfd *p;
92 	struct pollfd *pfd_list;
93 	int nfds_on_list;
94 
95 	fd_set zero;
96 
97 	/*
98 	 * Check for invalid conditions at outset.
99 	 * Required for spec1170.
100 	 * SUSV3: We must behave as a cancellation point even if we fail early.
101 	 */
102 	if (nfds >= 0 && nfds <= FD_SETSIZE) {
103 		if (tsp != NULL) {
104 			if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
105 			    tsp->tv_sec < 0) {
106 				pthread_testcancel();
107 				errno = EINVAL;
108 				return (-1);
109 			}
110 		}
111 	} else {
112 		pthread_testcancel();
113 		errno = EINVAL;
114 		return (-1);
115 	}
116 
117 	/*
118 	 * If any input args are null, point them at the null array.
119 	 */
120 	(void) memset(&zero, 0, sizeof (fd_set));
121 	if (in0 == NULL)
122 		in0 = &zero;
123 	if (out0 == NULL)
124 		out0 = &zero;
125 	if (ex0 == NULL)
126 		ex0 = &zero;
127 
128 	nfds_on_list = DEFAULT_POLL_SIZE;
129 	pfd_list = pfd;
130 	p = pfd_list;
131 	(void) memset(pfd, 0, sizeof (pfd));
132 	/*
133 	 * For each fd, if any bits are set convert them into
134 	 * the appropriate pollfd struct.
135 	 */
136 	in = (long *)in0->fds_bits;
137 	out = (long *)out0->fds_bits;
138 	ex = (long *)ex0->fds_bits;
139 	nused = 0;
140 	/*
141 	 * nused reflects the number of pollfd structs currently used
142 	 * less one. If realloc_fds returns NULL it is because malloc
143 	 * failed. We expect malloc() to have done the proper
144 	 * thing with errno.
145 	 */
146 	for (n = 0; n < nfds; n += NFDBITS) {
147 		b = (ulong_t)(*in | *out | *ex);
148 		for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
149 			if (b & 1) {
150 				p->fd = n + j;
151 				if (p->fd < nfds) {
152 					p->events = 0;
153 					if (*in & m)
154 						p->events |= POLLRDNORM;
155 					if (*out & m)
156 						p->events |= POLLWRNORM;
157 					if (*ex & m)
158 						p->events |= POLLRDBAND;
159 					if (nused < (nfds_on_list - 1)) {
160 						p++;
161 					} else if ((p = realloc_fds(
162 					    &nfds_on_list, &pfd_list, pfd))
163 					    == NULL) {
164 						if (pfd_list != pfd)
165 							free(pfd_list);
166 						pthread_testcancel();
167 						return (-1);
168 					}
169 					nused++;
170 				} else
171 					goto done;
172 			}
173 		}
174 		in++;
175 		out++;
176 		ex++;
177 	}
178 done:
179 	/*
180 	 * Now do the poll.
181 	 */
182 	do {
183 		rv = _pollsys(pfd_list, (nfds_t)nused, tsp, sigmask);
184 	} while (rv < 0 && errno == EAGAIN);
185 
186 	if (rv < 0) {		/* no need to set bit masks */
187 		if (pfd_list != pfd)
188 			free(pfd_list);
189 		return (rv);
190 	} else if (rv == 0) {
191 		/*
192 		 * Clear out bit masks, just in case.
193 		 * On the assumption that usually only
194 		 * one bit mask is set, use three loops.
195 		 */
196 		if (in0 != &zero) {
197 			in = (long *)in0->fds_bits;
198 			for (n = 0; n < nfds; n += NFDBITS)
199 				*in++ = 0;
200 		}
201 		if (out0 != &zero) {
202 			out = (long *)out0->fds_bits;
203 			for (n = 0; n < nfds; n += NFDBITS)
204 				*out++ = 0;
205 		}
206 		if (ex0 != &zero) {
207 			ex = (long *)ex0->fds_bits;
208 			for (n = 0; n < nfds; n += NFDBITS)
209 				*ex++ = 0;
210 		}
211 		if (pfd_list != pfd)
212 			free(pfd_list);
213 		return (0);
214 	}
215 
216 	/*
217 	 * Check for EINVAL error case first to avoid changing any bits
218 	 * if we're going to return an error.
219 	 */
220 	for (p = pfd_list, j = nused; j-- > 0; p++) {
221 		/*
222 		 * select will return EBADF immediately if any fd's
223 		 * are bad.  poll will complete the poll on the
224 		 * rest of the fd's and include the error indication
225 		 * in the returned bits.  This is a rare case so we
226 		 * accept this difference and return the error after
227 		 * doing more work than select would've done.
228 		 */
229 		if (p->revents & POLLNVAL) {
230 			errno = EBADF;
231 			if (pfd_list != pfd)
232 				free(pfd_list);
233 			return (-1);
234 		}
235 		/*
236 		 * We would like to make POLLHUP available to select,
237 		 * checking to see if we have pending data to be read.
238 		 * BUT until we figure out how not to break Xsun's
239 		 * dependencies on select's existing features...
240 		 * This is what we _thought_ would work ... sigh!
241 		 */
242 		/*
243 		 * if ((p->revents & POLLHUP) &&
244 		 *	!(p->revents & (POLLRDNORM|POLLRDBAND))) {
245 		 *	errno = EINTR;
246 		 *	return (-1);
247 		 * }
248 		 */
249 	}
250 
251 	/*
252 	 * Convert results of poll back into bits
253 	 * in the argument arrays.
254 	 *
255 	 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
256 	 * on return from poll if they were set on input, thus we don't
257 	 * worry about accidentally setting the corresponding bits in the
258 	 * zero array if the input bit masks were null.
259 	 *
260 	 * Must return number of bits set, not number of ready descriptors
261 	 * (as the man page says, and as poll() does).
262 	 */
263 	rv = 0;
264 	for (p = pfd_list; nused-- > 0; p++) {
265 		j = (int)(p->fd / NFDBITS);
266 		/* have we moved into another word of the bit mask yet? */
267 		if (j != lastj) {
268 			/* clear all output bits to start with */
269 			in = (long *)&in0->fds_bits[j];
270 			out = (long *)&out0->fds_bits[j];
271 			ex = (long *)&ex0->fds_bits[j];
272 			/*
273 			 * In case we made "zero" read-only (e.g., with
274 			 * cc -R), avoid actually storing into it.
275 			 */
276 			if (in0 != &zero)
277 				*in = 0;
278 			if (out0 != &zero)
279 				*out = 0;
280 			if (ex0 != &zero)
281 				*ex = 0;
282 			lastj = j;
283 		}
284 		if (p->revents) {
285 			m = 1L << (p->fd % NFDBITS);
286 			if (p->revents & POLLRDNORM) {
287 				*in |= m;
288 				rv++;
289 			}
290 			if (p->revents & POLLWRNORM) {
291 				*out |= m;
292 				rv++;
293 			}
294 			if (p->revents & POLLRDBAND) {
295 				*ex |= m;
296 				rv++;
297 			}
298 			/*
299 			 * Only set this bit on return if we asked about
300 			 * input conditions.
301 			 */
302 			if ((p->revents & (POLLHUP|POLLERR)) &&
303 			    (p->events & POLLRDNORM)) {
304 				if ((*in & m) == 0)
305 					rv++;	/* wasn't already set */
306 				*in |= m;
307 			}
308 			/*
309 			 * Only set this bit on return if we asked about
310 			 * output conditions.
311 			 */
312 			if ((p->revents & (POLLHUP|POLLERR)) &&
313 			    (p->events & POLLWRNORM)) {
314 				if ((*out & m) == 0)
315 					rv++;	/* wasn't already set */
316 				*out |= m;
317 			}
318 			/*
319 			 * Only set this bit on return if we asked about
320 			 * output conditions.
321 			 */
322 			if ((p->revents & (POLLHUP|POLLERR)) &&
323 			    (p->events & POLLRDBAND)) {
324 				if ((*ex & m) == 0)
325 					rv++;   /* wasn't already set */
326 				*ex |= m;
327 			}
328 		}
329 	}
330 	if (pfd_list != pfd)
331 		free(pfd_list);
332 	return (rv);
333 }
334 
335 int
select_large_fdset(int nfds,fd_set * in0,fd_set * out0,fd_set * ex0,struct timeval * tv)336 select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
337 	struct timeval *tv)
338 {
339 	timespec_t ts;
340 	timespec_t *tsp;
341 
342 	if (tv == NULL)
343 		tsp = NULL;
344 	else {
345 		/* check timeval validity */
346 		if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
347 			errno = EINVAL;
348 			return (-1);
349 		}
350 		/*
351 		 * Convert timeval to timespec.
352 		 * To preserve compatibility with past behavior,
353 		 * when select was built upon poll(2), which has a
354 		 * minimum non-zero timeout of 1 millisecond, force
355 		 * a minimum non-zero timeout of 500 microseconds.
356 		 */
357 		ts.tv_sec = tv->tv_sec;
358 		ts.tv_nsec = tv->tv_usec * 1000;
359 		if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
360 			ts.tv_nsec = 500000;
361 		tsp = &ts;
362 	}
363 
364 	return (pselect_large_fdset(nfds, in0, out0, ex0, tsp, NULL));
365 }
366 
367 /*
368  * Reallocate buffers of pollfds for our list. We malloc a new buffer
369  * and, in the case where the old buffer does not match what is passed
370  * in orig, free the buffer after copying the contents.
371  */
372 struct pollfd *
realloc_fds(int * num,struct pollfd ** list_head,struct pollfd * orig)373 realloc_fds(int *num, struct pollfd **list_head, struct pollfd *orig)
374 {
375 	struct pollfd *b;
376 	int nta;
377 	int n2;
378 
379 	n2 = *num * 2;
380 	nta = n2 * sizeof (struct pollfd);
381 	b = malloc(nta);
382 	if (b) {
383 		(void) memset(b, 0, (size_t)nta);
384 		(void) memcpy(b, *list_head, nta / 2);
385 		if (*list_head != orig)
386 			free(*list_head);
387 		*list_head = b;
388 		b += *num;
389 		*num = n2;
390 	}
391 	return (b);
392 }
393