xref: /illumos-gate/usr/src/lib/brand/solaris10/s10_brand/common/s10_brand.c (revision 2dd5848fa9da42f374782814f362e0afda124ecd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <dirent.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <strings.h>
34 #include <unistd.h>
35 #include <thread.h>
36 #include <sys/auxv.h>
37 #include <sys/brand.h>
38 #include <sys/inttypes.h>
39 #include <sys/lwp.h>
40 #include <sys/syscall.h>
41 #include <sys/systm.h>
42 #include <sys/utsname.h>
43 #include <sys/sysconfig.h>
44 #include <sys/systeminfo.h>
45 #include <sys/zone.h>
46 #include <sys/stat.h>
47 #include <sys/mntent.h>
48 #include <sys/ctfs.h>
49 #include <sys/priv.h>
50 #include <sys/acctctl.h>
51 #include <libgen.h>
52 #include <bsm/audit.h>
53 #include <sys/crypto/ioctl.h>
54 #include <sys/fs/zfs.h>
55 #include <sys/zfs_ioctl.h>
56 #include <sys/ucontext.h>
57 #include <sys/mntio.h>
58 #include <sys/mnttab.h>
59 #include <sys/attr.h>
60 #include <atomic.h>
61 
62 #include <s10_brand.h>
63 #include <s10_misc.h>
64 #include <s10_signal.h>
65 
66 /*
67  * Principles of emulation 101.
68  *
69  *
70  * *** Setting errno
71  *
72  * Just don't do it.  This emulation library is loaded onto a
73  * seperate link map from the application who's address space we're
74  * running in.  We have our own private copy of libc, so there for,
75  * the errno value accessible from here is is also private and changing
76  * it will not affect any errno value that the processes who's address
77  * space we are running in will see.  To return an error condition we
78  * should return the negated errno value we'd like the system to return.
79  * For more information about this see the comment in s10_handler().
80  * Basically, when we return to the caller that initiated the system
81  * call it's their responsibility to set errno.
82  *
83  *
84  * *** Recursion Considerations
85  *
86  * When emulating system calls we need to be very careful about what
87  * library calls we invoke.  Library calls should be kept to a minimum.
88  * One issue is that library calls can invoke system calls, so if we're
89  * emulating a system call and we invoke a library call that depends on
90  * that system call we will probably enter a recursive loop, which would
91  * be bad.
92  *
93  *
94  * *** Return Values.
95  *
96  * When declaring new syscall emulation functions, it is very important
97  * to to set the proper RV_* flags in the s10_sysent_table.  Upon failure,
98  * syscall emulation fuctions should return an errno value.  Upon success
99  * syscall emulation functions should return 0 and set the sysret_t return
100  * value parameters accordingly.
101  *
102  * There are five possible syscall macro wrappers used in the kernel's system
103  * call sysent table.  These turn into the following return values:
104  *	SYSENT_CL	-> SYSENT_C or SYSENT_CI
105  *	SYSENT_C	SE_64RVAL		RV_DEFAULT
106  *	SYSENT_CI	SE_32RVAL1		RV_DEFAULT
107  *	SYSENT_2CI	SE_32RVAL1|SE_32RVAL2	RV_32RVAL2
108  *	SYSENT_AP	SE_64RVAL		RV_64RVAL
109  *
110  *
111  * *** Agent lwp considerations
112  *
113  * It is currently impossible to do any emulation for these system call
114  * when they are being invoked on behalf of an agent lwp.  To understand why
115  * it's impossible you have to understand how agent lwp syscalls work.
116  *
117  * The agent lwp syscall process works as follows:
118  *   1  The controlling process stops the target.
119  *   2  The controlling process injects an agent lwp which is also stopped.
120  *      This agent lwp assumes the userland stack and register values
121  *      of another stopped lwp in the current process.
122  *   3  The controlling process configures the agent lwp to start
123  *      executing the requested system call.
124  *   4  The controlling process configure /proc to stop the agent lwp when
125  *      it enters the requested system call.
126  *   5  The controlling processes allows the agent lwp to start executing.
127  *   6  The agent lwp traps into the kernel to perform the requested system
128  *      call and immediately stop.
129  *   7  The controlling process copies all the arguments for the requested
130  *      system call onto the agent lwp's stack.
131  *   8  The controlling process configures /proc to stop the agent lwp
132  *      when it completes the requested system call.
133  *   9  The controlling processes allows the agent lwp to start executing.
134  *  10  The agent lwp executes the system call and then stop before returning
135  *      to userland.
136  *  11  The controlling process copies the return value and return arguments
137  *      back from the agent lwps stack.
138  *  12  The controlling process destroys the agent lwp and restarts
139  *      the target process.
140  *
141  * The fundamental problem is that when the agent executes the request
142  * system call in step 5, if we're emulating that system call then the
143  * lwp is redirected back to our emulation layer without blocking
144  * in the kernel.  But our emulation layer can't access the arguments
145  * for the system call because they haven't been copied to the stack
146  * yet and they still only exist in the controlling processes address
147  * space.  This prevents us from being able to do any emulation of
148  * agent lwp system calls.  Hence, currently our brand trap interposition
149  * callback (s10_brand_syscall_callback_common) will detect if a system
150  * call is being made by an agent lwp, and if this is the case it will
151  * never redirect the system call to this emulation library.
152  *
153  * In the future, if this proves to be a problem the the easiest solution
154  * would probably be to replace the branded versions of these application
155  * with their native counterparts.  Ie,  truss, plimit, and pfiles could be
156  * replace with wrapper scripts that execute the native versions of these
157  * applications.  In the case of plimit and pfiles this should be pretty
158  * strait forward.  Truss would probably be more tricky since it can
159  * execute applications which would be branded applications, so in that
160  * case it might be necessary to create a loadable library which could
161  * be LD_PRELOADed into truss and this library would interpose on the
162  * exec() system call to allow truss to correctly execute branded
163  * processes.  It should be pointed out that this solution could work
164  * because "native agent lwps" (ie, agent lwps created by native
165  * processes) can be treated differently from "branded aged lwps" (ie,
166  * agent lwps created by branded processes), since native agent lwps
167  * would presumably be making native system calls and hence not need
168  * any interposition.
169  *
170  */
171 
172 static zoneid_t zoneid;
173 static boolean_t emul_global_zone = B_FALSE;
174 static s10_emul_bitmap_t emul_bitmap;
175 pid_t zone_init_pid;
176 
177 /*
178  * S10_FEATURE_IS_PRESENT is a macro that helps facilitate conditional
179  * emulation.  For each constant N defined in the s10_emulated_features
180  * enumeration in usr/src/uts/common/brand/solaris10/s10_brand.h,
181  * S10_FEATURE_IS_PRESENT(N) is true iff the feature/backport represented by N
182  * is present in the Solaris 10 image hosted within the zone.  In other words,
183  * S10_FEATURE_IS_PRESENT(N) is true iff the file /usr/lib/brand/solaris10/M,
184  * where M is the enum value of N, was present in the zone when the zone booted.
185  *
186  *
187  * *** Sample Usage
188  *
189  * Suppose that you need to backport a fix to Solaris 10 and there is
190  * emulation in place for the fix.  Suppose further that the emulation won't be
191  * needed if the fix is backported (i.e., if the fix is present in the hosted
192  * Solaris 10 environment, then the brand won't need the emulation).  Then if
193  * you add a constant named "S10_FEATURE_X" to the end of the
194  * s10_emulated_features enumeration that represents the backported fix and
195  * S10_FEATURE_X evaluates to four, then you should create a file named
196  * /usr/lib/brand/solaris10/4 as part of your backport.  Additionally, you
197  * should retain the aforementioned emulation but modify it so that it's
198  * performed only when S10_FEATURE_IS_PRESENT(S10_FEATURE_X) is false.  Thus the
199  * emulation function should look something like the following:
200  *
201  *	static int
202  *	my_emul_function(sysret_t *rv, ...)
203  *	{
204  *		if (S10_FEATURE_IS_PRESENT(S10_FEATURE_X)) {
205  *			// Don't emulate
206  *			return (__systemcall(rv, ...));
207  *		} else {
208  *			// Emulate whatever needs to be emulated when the
209  *			// backport isn't present in the Solaris 10 image.
210  *		}
211  *	}
212  */
213 #define	S10_FEATURE_IS_PRESENT(s10_emulated_features_constant)	\
214 	((emul_bitmap[(s10_emulated_features_constant) >> 3] &	\
215 	(1 << ((s10_emulated_features_constant) & 0x7))) != 0)
216 
217 #define	EMULATE(cb, args)	{ (sysent_cb_t)(cb), (args) }
218 #define	NOSYS			EMULATE(s10_unimpl, (0 | RV_DEFAULT))
219 
220 typedef long (*sysent_cb_t)();
221 typedef struct s10_sysent_table {
222 	sysent_cb_t	st_callc;
223 	uintptr_t	st_args;
224 } s10_sysent_table_t;
225 s10_sysent_table_t s10_sysent_table[];
226 
227 #define	S10_UTS_RELEASE	"5.10"
228 #define	S10_UTS_VERSION	"Generic_Virtual"
229 
230 /*LINTED: static unused*/
231 static volatile int		s10_abort_err;
232 /*LINTED: static unused*/
233 static volatile const char	*s10_abort_msg;
234 /*LINTED: static unused*/
235 static volatile const char	*s10_abort_file;
236 /*LINTED: static unused*/
237 static volatile int		s10_abort_line;
238 
239 extern int errno;
240 
241 /*ARGSUSED*/
242 void
243 _s10_abort(int err, const char *msg, const char *file, int line)
244 {
245 	sysret_t rval;
246 
247 	/* Save the error message into convenient globals */
248 	s10_abort_err = err;
249 	s10_abort_msg = msg;
250 	s10_abort_file = file;
251 	s10_abort_line = line;
252 
253 	/* kill ourselves */
254 	abort();
255 
256 	/* If abort() didn't work, try something stronger. */
257 	(void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGKILL);
258 }
259 
260 int
261 s10_uucopy(const void *from, void *to, size_t size)
262 {
263 	sysret_t rval;
264 
265 	if (__systemcall(&rval, SYS_uucopy + 1024, from, to, size) != 0)
266 		return (EFAULT);
267 	return (0);
268 }
269 
270 /*
271  * ATTENTION: uucopystr() does NOT ensure that string are null terminated!
272  */
273 int
274 s10_uucopystr(const void *from, void *to, size_t size)
275 {
276 	sysret_t rval;
277 
278 	if (__systemcall(&rval, SYS_uucopystr + 1024, from, to, size) != 0)
279 		return (EFAULT);
280 	return (0);
281 }
282 
283 /*
284  * Figures out the PID of init for the zone.  Also returns a boolean
285  * indicating whether this process currently has that pid: if so,
286  * then at this moment, we are init.
287  */
288 static boolean_t
289 get_initpid_info(void)
290 {
291 	pid_t pid;
292 	sysret_t rval;
293 	int err;
294 
295 	/*
296 	 * Determine the current process PID and the PID of the zone's init.
297 	 * We use care not to call getpid() here, because we're not supposed
298 	 * to call getpid() until after the program is fully linked-- the
299 	 * first call to getpid() is a signal from the linker to debuggers
300 	 * that linking has been completed.
301 	 */
302 	if ((err = __systemcall(&rval, SYS_brand,
303 	    B_S10_PIDINFO, &pid, &zone_init_pid)) != 0) {
304 		s10_abort(err, "Failed to get init's pid");
305 	}
306 
307 	/*
308 	 * Note that we need to be cautious with the pid we get back--
309 	 * it should not be stashed and used in place of getpid(), since
310 	 * we might fork(2).  So we keep zone_init_pid and toss the pid
311 	 * we otherwise got.
312 	 */
313 	if (pid == zone_init_pid)
314 		return (B_TRUE);
315 
316 	return (B_FALSE);
317 }
318 
319 /*
320  * This function is defined to be NOSYS but it won't be called from the
321  * the kernel since the NOSYS system calls are not enabled in the kernel.
322  * Thus, the only time this function is called is directly from within the
323  * indirect system call path.
324  */
325 /*ARGSUSED*/
326 static long
327 s10_unimpl(sysret_t *rv, uintptr_t p1)
328 {
329 	sysret_t rval;
330 
331 	/*
332 	 * We'd like to print out some kind of error message here like
333 	 * "unsupported syscall", but we can't because it's not safe to
334 	 * assume that stderr or STDERR_FILENO actually points to something
335 	 * that is a terminal, and if we wrote to those files we could
336 	 * inadvertantly write to some applications open files, which would
337 	 * be bad.
338 	 *
339 	 * Normally, if an application calls an invalid system call
340 	 * it get a SIGSYS sent to it.  So we'll just go ahead and send
341 	 * ourselves a signal here.  Note that this is far from ideal since
342 	 * if the application has registered a signal handler, that signal
343 	 * handler may recieve a ucontext_t as the third parameter to
344 	 * indicate the context of the process when the signal was
345 	 * generated, and in this case that context will not be what the
346 	 * application is expecting.  Hence, we should probably create a
347 	 * brandsys() kernel function that can deliver the signal to us
348 	 * with the correct ucontext_t.
349 	 */
350 	(void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGSYS);
351 	return (ENOSYS);
352 }
353 
354 #if defined(__sparc) && !defined(__sparcv9)
355 /*
356  * Yuck.  For 32-bit sparc applications, handle indirect system calls.
357  * Note that we declare this interface to use the maximum number of
358  * system call arguments.  If we recieve a system call that uses less
359  * arguments, then the additional arguments will be garbage, but they
360  * will also be ignored so that should be ok.
361  */
362 static long
363 s10_indir(sysret_t *rv, int code,
364     uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4,
365     uintptr_t a5, uintptr_t a6, uintptr_t a7)
366 {
367 	s10_sysent_table_t *sst = &(s10_sysent_table[code]);
368 
369 	s10_assert(code < NSYSCALL);
370 	switch (sst->st_args & NARGS_MASK) {
371 	case 0:
372 		return ((sst->st_callc)(rv));
373 	case 1:
374 		return ((sst->st_callc)(rv, a0));
375 	case 2:
376 		return ((sst->st_callc)(rv, a0, a1));
377 	case 3:
378 		return ((sst->st_callc)(rv, a0, a1, a2));
379 	case 4:
380 		return ((sst->st_callc)(rv, a0, a1, a2, a3));
381 	case 5:
382 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4));
383 	case 6:
384 		return ((sst->st_callc)(rv, rv, a0, a1, a2, a3, a4, a5));
385 	case 7:
386 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6));
387 	case 8:
388 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6, a7));
389 	}
390 	s10_abort(0, "invalid entry in s10_sysent_table");
391 	return (EINVAL);
392 }
393 #endif /* __sparc && !__sparcv9 */
394 
395 /* Free the thread-local storage provided by mntfs_get_mntentbuf(). */
396 static void
397 mntfs_free_mntentbuf(void *arg)
398 {
399 	struct mntentbuf *embufp = arg;
400 
401 	if (embufp == NULL)
402 		return;
403 	if (embufp->mbuf_emp)
404 		free(embufp->mbuf_emp);
405 	if (embufp->mbuf_buf)
406 		free(embufp->mbuf_buf);
407 	bzero(embufp, sizeof (struct mntentbuf));
408 	free(embufp);
409 }
410 
411 /* Provide the thread-local storage required by mntfs_ioctl(). */
412 static struct mntentbuf *
413 mntfs_get_mntentbuf(size_t size)
414 {
415 	static mutex_t keylock;
416 	static thread_key_t key;
417 	static int once_per_keyname = 0;
418 	void *tsd = NULL;
419 	struct mntentbuf *embufp;
420 
421 	/* Create the key. */
422 	if (!once_per_keyname) {
423 		(void) mutex_lock(&keylock);
424 		if (!once_per_keyname) {
425 			if (thr_keycreate(&key, mntfs_free_mntentbuf)) {
426 				(void) mutex_unlock(&keylock);
427 				return (NULL);
428 			} else {
429 				once_per_keyname++;
430 			}
431 		}
432 		(void) mutex_unlock(&keylock);
433 	}
434 
435 	/*
436 	 * The thread-specific datum for this key is the address of a struct
437 	 * mntentbuf. If this is the first time here then we allocate the struct
438 	 * and its contents, and associate its address with the thread; if there
439 	 * are any problems then we abort.
440 	 */
441 	if (thr_getspecific(key, &tsd))
442 		return (NULL);
443 	if (tsd == NULL) {
444 		if (!(embufp = calloc(1, sizeof (struct mntentbuf))) ||
445 		    !(embufp->mbuf_emp = malloc(sizeof (struct extmnttab))) ||
446 		    thr_setspecific(key, embufp)) {
447 			mntfs_free_mntentbuf(embufp);
448 			return (NULL);
449 		}
450 	} else {
451 		embufp = tsd;
452 	}
453 
454 	/* Return the buffer, resizing it if necessary. */
455 	if (size > embufp->mbuf_bufsize) {
456 		if (embufp->mbuf_buf)
457 			free(embufp->mbuf_buf);
458 		if ((embufp->mbuf_buf = malloc(size)) == NULL) {
459 			embufp->mbuf_bufsize = 0;
460 			return (NULL);
461 		} else {
462 			embufp->mbuf_bufsize = size;
463 		}
464 	}
465 	return (embufp);
466 }
467 
468 /*
469  * The MNTIOC_GETMNTENT command in this release differs from that in early
470  * versions of Solaris 10.
471  *
472  * Previously, the command would copy a pointer to a struct extmnttab to an
473  * address provided as an argument. The pointer would be somewhere within a
474  * mapping already present within the user's address space. In addition, the
475  * text to which the struct's members pointed would also be within a
476  * pre-existing mapping. Now, the user is required to allocate memory for both
477  * the struct and the text buffer, and to pass the address of each within a
478  * struct mntentbuf. In order to conceal these details from a Solaris 10 client
479  * we allocate some thread-local storage in which to create the necessary data
480  * structures; this is static, thread-safe memory that will be cleaned up
481  * without the caller's intervention.
482  *
483  * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY are new in this release; they should
484  * not work for older clients.
485  */
486 int
487 mntfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
488 {
489 	int err;
490 	struct stat statbuf;
491 	struct mntentbuf *embufp;
492 	static size_t bufsize = MNT_LINE_MAX;
493 
494 	/* Do not emulate mntfs commands from up-to-date clients. */
495 	if (S10_FEATURE_IS_PRESENT(S10_FEATURE_ALTERED_MNTFS_IOCTL))
496 		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
497 
498 	/* Do not emulate mntfs commands directed at other file systems. */
499 	if ((err = __systemcall(rval, SYS_fstatat + 1024,
500 	    fdes, NULL, &statbuf, 0)) != 0)
501 		return (err);
502 	if (strcmp(statbuf.st_fstype, MNTTYPE_MNTFS) != 0)
503 		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
504 
505 	if (cmd == MNTIOC_GETEXTMNTENT || cmd == MNTIOC_GETMNTANY)
506 		return (EINVAL);
507 
508 	if ((embufp = mntfs_get_mntentbuf(bufsize)) == NULL)
509 		return (ENOMEM);
510 
511 	/*
512 	 * MNTIOC_GETEXTMNTENT advances the file pointer once it has
513 	 * successfully copied out the result to the address provided. We
514 	 * therefore need to check the user-supplied address now since the
515 	 * one we'll be providing is guaranteed to work.
516 	 */
517 	if (s10_uucopy(&embufp->mbuf_emp, (void *)arg, sizeof (void *)) != 0)
518 		return (EFAULT);
519 
520 	/*
521 	 * Keep retrying for as long as we fail for want of a large enough
522 	 * buffer.
523 	 */
524 	for (;;) {
525 		if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes,
526 		    MNTIOC_GETEXTMNTENT, embufp)) != 0)
527 			return (err);
528 
529 		if (rval->sys_rval1 == MNTFS_TOOLONG) {
530 			/* The buffer wasn't large enough. */
531 			(void) atomic_swap_ulong((unsigned long *)&bufsize,
532 			    2 * embufp->mbuf_bufsize);
533 			if ((embufp = mntfs_get_mntentbuf(bufsize)) == NULL)
534 				return (ENOMEM);
535 		} else {
536 			break;
537 		}
538 	}
539 
540 	if (s10_uucopy(&embufp->mbuf_emp, (void *)arg, sizeof (void *)) != 0)
541 		return (EFAULT);
542 
543 	return (0);
544 }
545 
546 /*
547  * Assign the structure member value from the s (source) structure to the
548  * d (dest) structure.
549  */
550 #define	struct_assign(d, s, val)	(((d).val) = ((s).val))
551 
552 /*
553  * The CRYPTO_GET_FUNCTION_LIST parameter structure crypto_function_list_t
554  * changed between S10 and Nevada, so we have to emulate the old S10
555  * crypto_function_list_t structure when interposing on the ioctl syscall.
556  */
557 typedef struct s10_crypto_function_list {
558 	boolean_t fl_digest_init;
559 	boolean_t fl_digest;
560 	boolean_t fl_digest_update;
561 	boolean_t fl_digest_key;
562 	boolean_t fl_digest_final;
563 
564 	boolean_t fl_encrypt_init;
565 	boolean_t fl_encrypt;
566 	boolean_t fl_encrypt_update;
567 	boolean_t fl_encrypt_final;
568 
569 	boolean_t fl_decrypt_init;
570 	boolean_t fl_decrypt;
571 	boolean_t fl_decrypt_update;
572 	boolean_t fl_decrypt_final;
573 
574 	boolean_t fl_mac_init;
575 	boolean_t fl_mac;
576 	boolean_t fl_mac_update;
577 	boolean_t fl_mac_final;
578 
579 	boolean_t fl_sign_init;
580 	boolean_t fl_sign;
581 	boolean_t fl_sign_update;
582 	boolean_t fl_sign_final;
583 	boolean_t fl_sign_recover_init;
584 	boolean_t fl_sign_recover;
585 
586 	boolean_t fl_verify_init;
587 	boolean_t fl_verify;
588 	boolean_t fl_verify_update;
589 	boolean_t fl_verify_final;
590 	boolean_t fl_verify_recover_init;
591 	boolean_t fl_verify_recover;
592 
593 	boolean_t fl_digest_encrypt_update;
594 	boolean_t fl_decrypt_digest_update;
595 	boolean_t fl_sign_encrypt_update;
596 	boolean_t fl_decrypt_verify_update;
597 
598 	boolean_t fl_seed_random;
599 	boolean_t fl_generate_random;
600 
601 	boolean_t fl_session_open;
602 	boolean_t fl_session_close;
603 	boolean_t fl_session_login;
604 	boolean_t fl_session_logout;
605 
606 	boolean_t fl_object_create;
607 	boolean_t fl_object_copy;
608 	boolean_t fl_object_destroy;
609 	boolean_t fl_object_get_size;
610 	boolean_t fl_object_get_attribute_value;
611 	boolean_t fl_object_set_attribute_value;
612 	boolean_t fl_object_find_init;
613 	boolean_t fl_object_find;
614 	boolean_t fl_object_find_final;
615 
616 	boolean_t fl_key_generate;
617 	boolean_t fl_key_generate_pair;
618 	boolean_t fl_key_wrap;
619 	boolean_t fl_key_unwrap;
620 	boolean_t fl_key_derive;
621 
622 	boolean_t fl_init_token;
623 	boolean_t fl_init_pin;
624 	boolean_t fl_set_pin;
625 
626 	boolean_t prov_is_hash_limited;
627 	uint32_t prov_hash_threshold;
628 	uint32_t prov_hash_limit;
629 } s10_crypto_function_list_t;
630 
631 typedef struct s10_crypto_get_function_list {
632 	uint_t				fl_return_value;
633 	crypto_provider_id_t		fl_provider_id;
634 	s10_crypto_function_list_t	fl_list;
635 } s10_crypto_get_function_list_t;
636 
637 /*
638  * The structure returned by the CRYPTO_GET_FUNCTION_LIST ioctl on /dev/crypto
639  * increased in size due to:
640  *	6482533 Threshold for HW offload via PKCS11 interface
641  * between S10 and Nevada.  This is a relatively simple process of filling
642  * in the S10 structure fields with the Nevada data.
643  *
644  * We stat the device to make sure that the ioctl is meant for /dev/crypto.
645  *
646  */
647 static int
648 crypto_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
649 {
650 	int				err;
651 	s10_crypto_get_function_list_t	s10_param;
652 	crypto_get_function_list_t	native_param;
653 	static dev_t			crypto_dev = (dev_t)-1;
654 	struct stat			sbuf;
655 
656 	if (crypto_dev == (dev_t)-1) {
657 		if ((err = __systemcall(rval, SYS_fstatat + 1024,
658 		    AT_FDCWD, "/dev/crypto", &sbuf, 0)) != 0)
659 			goto nonemuioctl;
660 		crypto_dev = major(sbuf.st_rdev);
661 	}
662 	if ((err = __systemcall(rval, SYS_fstatat + 1024,
663 	    fdes, NULL, &sbuf, 0)) != 0)
664 		return (err);
665 	/* Each open fd of /dev/crypto gets a new minor device. */
666 	if (major(sbuf.st_rdev) != crypto_dev)
667 		goto nonemuioctl;
668 
669 	if (s10_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0)
670 		return (EFAULT);
671 	struct_assign(native_param, s10_param, fl_provider_id);
672 	if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd,
673 	    &native_param)) != 0)
674 		return (err);
675 
676 	struct_assign(s10_param, native_param, fl_return_value);
677 	struct_assign(s10_param, native_param, fl_provider_id);
678 
679 	struct_assign(s10_param, native_param, fl_list.fl_digest_init);
680 	struct_assign(s10_param, native_param, fl_list.fl_digest);
681 	struct_assign(s10_param, native_param, fl_list.fl_digest_update);
682 	struct_assign(s10_param, native_param, fl_list.fl_digest_key);
683 	struct_assign(s10_param, native_param, fl_list.fl_digest_final);
684 
685 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_init);
686 	struct_assign(s10_param, native_param, fl_list.fl_encrypt);
687 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_update);
688 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_final);
689 
690 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_init);
691 	struct_assign(s10_param, native_param, fl_list.fl_decrypt);
692 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_update);
693 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_final);
694 
695 	struct_assign(s10_param, native_param, fl_list.fl_mac_init);
696 	struct_assign(s10_param, native_param, fl_list.fl_mac);
697 	struct_assign(s10_param, native_param, fl_list.fl_mac_update);
698 	struct_assign(s10_param, native_param, fl_list.fl_mac_final);
699 
700 	struct_assign(s10_param, native_param, fl_list.fl_sign_init);
701 	struct_assign(s10_param, native_param, fl_list.fl_sign);
702 	struct_assign(s10_param, native_param, fl_list.fl_sign_update);
703 	struct_assign(s10_param, native_param, fl_list.fl_sign_final);
704 	struct_assign(s10_param, native_param, fl_list.fl_sign_recover_init);
705 	struct_assign(s10_param, native_param, fl_list.fl_sign_recover);
706 
707 	struct_assign(s10_param, native_param, fl_list.fl_verify_init);
708 	struct_assign(s10_param, native_param, fl_list.fl_verify);
709 	struct_assign(s10_param, native_param, fl_list.fl_verify_update);
710 	struct_assign(s10_param, native_param, fl_list.fl_verify_final);
711 	struct_assign(s10_param, native_param, fl_list.fl_verify_recover_init);
712 	struct_assign(s10_param, native_param, fl_list.fl_verify_recover);
713 
714 	struct_assign(s10_param, native_param,
715 	    fl_list.fl_digest_encrypt_update);
716 	struct_assign(s10_param, native_param,
717 	    fl_list.fl_decrypt_digest_update);
718 	struct_assign(s10_param, native_param, fl_list.fl_sign_encrypt_update);
719 	struct_assign(s10_param, native_param,
720 	    fl_list.fl_decrypt_verify_update);
721 
722 	struct_assign(s10_param, native_param, fl_list.fl_seed_random);
723 	struct_assign(s10_param, native_param, fl_list.fl_generate_random);
724 
725 	struct_assign(s10_param, native_param, fl_list.fl_session_open);
726 	struct_assign(s10_param, native_param, fl_list.fl_session_close);
727 	struct_assign(s10_param, native_param, fl_list.fl_session_login);
728 	struct_assign(s10_param, native_param, fl_list.fl_session_logout);
729 
730 	struct_assign(s10_param, native_param, fl_list.fl_object_create);
731 	struct_assign(s10_param, native_param, fl_list.fl_object_copy);
732 	struct_assign(s10_param, native_param, fl_list.fl_object_destroy);
733 	struct_assign(s10_param, native_param, fl_list.fl_object_get_size);
734 	struct_assign(s10_param, native_param,
735 	    fl_list.fl_object_get_attribute_value);
736 	struct_assign(s10_param, native_param,
737 	    fl_list.fl_object_set_attribute_value);
738 	struct_assign(s10_param, native_param, fl_list.fl_object_find_init);
739 	struct_assign(s10_param, native_param, fl_list.fl_object_find);
740 	struct_assign(s10_param, native_param, fl_list.fl_object_find_final);
741 
742 	struct_assign(s10_param, native_param, fl_list.fl_key_generate);
743 	struct_assign(s10_param, native_param, fl_list.fl_key_generate_pair);
744 	struct_assign(s10_param, native_param, fl_list.fl_key_wrap);
745 	struct_assign(s10_param, native_param, fl_list.fl_key_unwrap);
746 	struct_assign(s10_param, native_param, fl_list.fl_key_derive);
747 
748 	struct_assign(s10_param, native_param, fl_list.fl_init_token);
749 	struct_assign(s10_param, native_param, fl_list.fl_init_pin);
750 	struct_assign(s10_param, native_param, fl_list.fl_set_pin);
751 
752 	struct_assign(s10_param, native_param, fl_list.prov_is_hash_limited);
753 	struct_assign(s10_param, native_param, fl_list.prov_hash_threshold);
754 	struct_assign(s10_param, native_param, fl_list.prov_hash_limit);
755 
756 	return (s10_uucopy(&s10_param, (void *)arg, sizeof (s10_param)));
757 
758 nonemuioctl:
759 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
760 }
761 
762 /*
763  * The process contract CT_TGET and CT_TSET parameter structure ct_param_t
764  * changed between S10 and Nevada, so we have to emulate the old S10
765  * ct_param_t structure when interposing on the ioctl syscall.
766  */
767 typedef struct s10_ct_param {
768 	uint32_t ctpm_id;
769 	uint32_t ctpm_pad;
770 	uint64_t ctpm_value;
771 } s10_ct_param_t;
772 
773 /*
774  * We have to emulate process contract ioctls for init(1M) because the
775  * ioctl parameter structure changed between S10 and Nevada.  This is
776  * a relatively simple process of filling Nevada structure fields,
777  * shuffling values, and initiating a native system call.
778  *
779  * For now, we'll assume that all consumers of CT_TGET and CT_TSET will
780  * need emulation.  We'll issue a stat to make sure that the ioctl
781  * is meant for the contract file system.
782  *
783  */
784 static int
785 ctfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
786 {
787 	int err;
788 	s10_ct_param_t s10param;
789 	ct_param_t param;
790 	struct stat statbuf;
791 
792 	if ((err = __systemcall(rval, SYS_fstatat + 1024,
793 	    fdes, NULL, &statbuf, 0)) != 0)
794 		return (err);
795 	if (strcmp(statbuf.st_fstype, MNTTYPE_CTFS) != 0)
796 		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
797 
798 	if (s10_uucopy((const void *)arg, &s10param, sizeof (s10param)) != 0)
799 		return (EFAULT);
800 	param.ctpm_id = s10param.ctpm_id;
801 	param.ctpm_size = sizeof (uint64_t);
802 	param.ctpm_value = &s10param.ctpm_value;
803 	if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &param))
804 	    != 0)
805 		return (err);
806 
807 	if (cmd == CT_TGET)
808 		return (s10_uucopy(&s10param, (void *)arg, sizeof (s10param)));
809 
810 	return (0);
811 }
812 
813 typedef struct s10_zfs_cmd {
814 	char		zc_name[MAXPATHLEN];
815 	char		zc_value[MAXPATHLEN * 2];
816 	char		zc_string[MAXNAMELEN];
817 	uint64_t	zc_guid;
818 	uint64_t	zc_nvlist_conf;		/* really (char *) */
819 	uint64_t	zc_nvlist_conf_size;
820 	uint64_t	zc_nvlist_src;		/* really (char *) */
821 	uint64_t	zc_nvlist_src_size;
822 	uint64_t	zc_nvlist_dst;		/* really (char *) */
823 	uint64_t	zc_nvlist_dst_size;
824 	uint64_t	zc_cookie;
825 	uint64_t	zc_objset_type;
826 	uint64_t	zc_perm_action;
827 	uint64_t 	zc_history;		/* really (char *) */
828 	uint64_t 	zc_history_len;
829 	uint64_t	zc_history_offset;
830 	uint64_t	zc_obj;
831 	/* Solaris Next added zc_iflags member here */
832 	zfs_share_t	zc_share;
833 	dmu_objset_stats_t zc_objset_stats;
834 	struct drr_begin zc_begin_record;
835 	zinject_record_t zc_inject_record;
836 } s10_zfs_cmd_t;
837 
838 /*
839  * There is a difference in the zfs_cmd_t ioctl parameter between S10 and
840  * Solaris Next so we need to translate between the two structures when
841  * making ZFS ioctls.
842  */
843 static int
844 zfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
845 {
846 	int				err;
847 	s10_zfs_cmd_t			s10_param;
848 	zfs_cmd_t			native_param;
849 	static dev_t			zfs_dev = (dev_t)-1;
850 	struct stat			sbuf;
851 
852 	if (zfs_dev == (dev_t)-1) {
853 		if ((err = __systemcall(rval, SYS_fstatat + 1024,
854 		    AT_FDCWD, "/dev/zfs", &sbuf, 0) != 0) != 0)
855 			goto nonemuioctl;
856 		zfs_dev = major(sbuf.st_rdev);
857 	}
858 	if ((err = __systemcall(rval, SYS_fstatat + 1024,
859 	    fdes, NULL, &sbuf, 0)) != 0)
860 		return (err);
861 	if (major(sbuf.st_rdev) != zfs_dev)
862 		goto nonemuioctl;
863 
864 	if (s10_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0)
865 		return (EFAULT);
866 
867 	bcopy((const void *)s10_param.zc_name, (void *)native_param.zc_name,
868 	    sizeof (s10_param.zc_name));
869 	bcopy((const void *)s10_param.zc_value, (void *)native_param.zc_value,
870 	    sizeof (s10_param.zc_value));
871 	bcopy((const void *)s10_param.zc_string, (void *)native_param.zc_string,
872 	    sizeof (s10_param.zc_string));
873 	struct_assign(native_param, s10_param, zc_guid);
874 	struct_assign(native_param, s10_param, zc_nvlist_conf);
875 	struct_assign(native_param, s10_param, zc_nvlist_conf_size);
876 	struct_assign(native_param, s10_param, zc_nvlist_src);
877 	struct_assign(native_param, s10_param, zc_nvlist_src_size);
878 	struct_assign(native_param, s10_param, zc_nvlist_dst);
879 	struct_assign(native_param, s10_param, zc_nvlist_dst_size);
880 	struct_assign(native_param, s10_param, zc_cookie);
881 	struct_assign(native_param, s10_param, zc_objset_type);
882 	struct_assign(native_param, s10_param, zc_perm_action);
883 	struct_assign(native_param, s10_param, zc_history);
884 	struct_assign(native_param, s10_param, zc_history_len);
885 	struct_assign(native_param, s10_param, zc_history_offset);
886 	struct_assign(native_param, s10_param, zc_obj);
887 	native_param.zc_iflags = 0;
888 	struct_assign(native_param, s10_param, zc_share);
889 	struct_assign(native_param, s10_param, zc_objset_stats);
890 	struct_assign(native_param, s10_param, zc_begin_record);
891 	struct_assign(native_param, s10_param, zc_inject_record);
892 
893 	err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &native_param);
894 
895 	bcopy((const void *)native_param.zc_name, (void *)s10_param.zc_name,
896 	    sizeof (s10_param.zc_name));
897 	bcopy((const void *)native_param.zc_value, (void *)s10_param.zc_value,
898 	    sizeof (s10_param.zc_value));
899 	bcopy((const void *)native_param.zc_string, (void *)s10_param.zc_string,
900 	    sizeof (s10_param.zc_string));
901 	struct_assign(s10_param, native_param, zc_guid);
902 	struct_assign(s10_param, native_param, zc_nvlist_conf);
903 	struct_assign(s10_param, native_param, zc_nvlist_conf_size);
904 	struct_assign(s10_param, native_param, zc_nvlist_src);
905 	struct_assign(s10_param, native_param, zc_nvlist_src_size);
906 	struct_assign(s10_param, native_param, zc_nvlist_dst);
907 	struct_assign(s10_param, native_param, zc_nvlist_dst_size);
908 	struct_assign(s10_param, native_param, zc_cookie);
909 	struct_assign(s10_param, native_param, zc_objset_type);
910 	struct_assign(s10_param, native_param, zc_perm_action);
911 	struct_assign(s10_param, native_param, zc_history);
912 	struct_assign(s10_param, native_param, zc_history_len);
913 	struct_assign(s10_param, native_param, zc_history_offset);
914 	struct_assign(s10_param, native_param, zc_obj);
915 	struct_assign(s10_param, native_param, zc_share);
916 	struct_assign(s10_param, native_param, zc_objset_stats);
917 	struct_assign(s10_param, native_param, zc_begin_record);
918 	struct_assign(s10_param, native_param, zc_inject_record);
919 
920 	(void) s10_uucopy(&s10_param, (void *)arg, sizeof (s10_param));
921 	return (err);
922 
923 nonemuioctl:
924 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
925 }
926 
927 int
928 s10_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
929 {
930 	switch (cmd) {
931 	case CRYPTO_GET_FUNCTION_LIST:
932 		return (crypto_ioctl(rval, fdes, cmd, arg));
933 	case CT_TGET:
934 		/*FALLTHRU*/
935 	case CT_TSET:
936 		return (ctfs_ioctl(rval, fdes, cmd, arg));
937 	case MNTIOC_GETMNTENT:
938 		/*FALLTHRU*/
939 	case MNTIOC_GETEXTMNTENT:
940 		/*FALLTHRU*/
941 	case MNTIOC_GETMNTANY:
942 		return (mntfs_ioctl(rval, fdes, cmd, arg));
943 	}
944 
945 	if ((cmd & 0xff00) == ZFS_IOC)
946 		return (zfs_ioctl(rval, fdes, cmd, arg));
947 
948 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
949 }
950 
951 /*
952  * Unfortunately, pwrite()'s behavior differs between S10 and Nevada when
953  * applied to files opened with O_APPEND.  The offset argument is ignored and
954  * the buffer is appended to the target file in S10, whereas the current file
955  * position is ignored in Nevada (i.e., pwrite() acts as though the target file
956  * wasn't opened with O_APPEND).  This is a result of the fix for CR 6655660
957  * (pwrite() must ignore the O_APPEND/FAPPEND flag).
958  *
959  * We emulate the old S10 pwrite() behavior by checking whether the target file
960  * was opened with O_APPEND.  If it was, then invoke the write() system call
961  * instead of pwrite(); otherwise, invoke the pwrite() system call as usual.
962  */
963 static int
964 s10_pwrite(sysret_t *rval, int fd, const void *bufferp, size_t num_bytes,
965     off_t offset)
966 {
967 	int err;
968 
969 	if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
970 		return (err);
971 	if (rval->sys_rval1 & O_APPEND)
972 		return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
973 		    num_bytes));
974 	return (__systemcall(rval, SYS_pwrite + 1024, fd, bufferp, num_bytes,
975 	    offset));
976 }
977 
978 #if !defined(_LP64)
979 /*
980  * This is the large file version of the pwrite() system call for 32-bit
981  * processes.  This exists for the same reason that s10_pwrite() exists; see
982  * the comment above s10_pwrite().
983  */
984 static int
985 s10_pwrite64(sysret_t *rval, int fd, const void *bufferp, size32_t num_bytes,
986     uint32_t offset_1, uint32_t offset_2)
987 {
988 	int err;
989 
990 	if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
991 		return (err);
992 	if (rval->sys_rval1 & O_APPEND)
993 		return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
994 		    num_bytes));
995 	return (__systemcall(rval, SYS_pwrite64 + 1024, fd, bufferp,
996 	    num_bytes, offset_1, offset_2));
997 }
998 #endif	/* !_LP64 */
999 
1000 /*
1001  * These are convenience macros that s10_getdents_common() uses.  Both treat
1002  * their arguments, which should be character pointers, as dirent pointers or
1003  * dirent64 pointers and yield their d_name and d_reclen fields.  These
1004  * macros shouldn't be used outside of s10_getdents_common().
1005  */
1006 #define	dirent_name(charptr)	((charptr) + name_offset)
1007 #define	dirent_reclen(charptr)	\
1008 	(*(unsigned short *)(uintptr_t)((charptr) + reclen_offset))
1009 
1010 /*
1011  * This function contains code that is common to both s10_getdents() and
1012  * s10_getdents64().  See the comment above s10_getdents() for details.
1013  *
1014  * rval, fd, buf, and nbyte should be passed unmodified from s10_getdents()
1015  * and s10_getdents64().  getdents_syscall_id should be either SYS_getdents
1016  * or SYS_getdents64.  name_offset should be the the byte offset of
1017  * the d_name field in the dirent structures passed to the kernel via the
1018  * syscall represented by getdents_syscall_id.  reclen_offset should be
1019  * the byte offset of the d_reclen field in the aforementioned dirent
1020  * structures.
1021  */
1022 static int
1023 s10_getdents_common(sysret_t *rval, int fd, char *buf, size_t nbyte,
1024     int getdents_syscall_id, size_t name_offset, size_t reclen_offset)
1025 {
1026 	int err;
1027 	size_t buf_size;
1028 	char *local_buf;
1029 	char *buf_current;
1030 
1031 	/*
1032 	 * Use a special brand operation, B_S10_ISFDXATTRDIR, to determine
1033 	 * whether the specified file descriptor refers to an extended file
1034 	 * attribute directory.  If it doesn't, then SYS_getdents won't
1035 	 * reveal extended file attributes, in which case we can simply
1036 	 * hand the syscall to the native kernel.
1037 	 */
1038 	if ((err = __systemcall(rval, SYS_brand + 1024, B_S10_ISFDXATTRDIR,
1039 	    fd)) != 0)
1040 		return (err);
1041 	if (rval->sys_rval1 == 0)
1042 		return (__systemcall(rval, getdents_syscall_id + 1024, fd, buf,
1043 		    nbyte));
1044 
1045 	/*
1046 	 * The file descriptor refers to an extended file attributes directory.
1047 	 * We need to create a dirent buffer that's as large as buf into which
1048 	 * the native SYS_getdents will store the special extended file
1049 	 * attribute directory's entries.  We can't dereference buf because
1050 	 * it might be an invalid pointer!
1051 	 */
1052 	if (nbyte > MAXGETDENTS_SIZE)
1053 		nbyte = MAXGETDENTS_SIZE;
1054 	local_buf = (char *)malloc(nbyte);
1055 	if (local_buf == NULL) {
1056 		/*
1057 		 * getdents(2) doesn't return an error code indicating a memory
1058 		 * allocation error and it doesn't make sense to return any of
1059 		 * its documented error codes for a malloc(3C) failure.  We'll
1060 		 * use ENOMEM even though getdents(2) doesn't use it because it
1061 		 * best describes the failure.
1062 		 */
1063 		(void) S10_TRUSS_POINT_3(rval, getdents_syscall_id, ENOMEM, fd,
1064 		    buf, nbyte);
1065 		rval->sys_rval1 = -1;
1066 		rval->sys_rval2 = 0;
1067 		return (EIO);
1068 	}
1069 
1070 	/*
1071 	 * Issue a native SYS_getdents syscall but use our local dirent buffer
1072 	 * instead of buf.  This will allow us to examine the returned dirent
1073 	 * structures immediately and copy them to buf later.  That way the
1074 	 * calling process won't be able to see the dirent structures until
1075 	 * we finish examining them.
1076 	 */
1077 	if ((err = __systemcall(rval, getdents_syscall_id + 1024, fd, local_buf,
1078 	    nbyte)) != 0) {
1079 		free(local_buf);
1080 		return (err);
1081 	}
1082 	buf_size = rval->sys_rval1;
1083 	if (buf_size == 0) {
1084 		free(local_buf);
1085 		return (0);
1086 	}
1087 
1088 	/*
1089 	 * Look for SUNWattr_ro (VIEW_READONLY) and SUNWattr_rw
1090 	 * (VIEW_READWRITE) in the directory entries and remove them
1091 	 * from the dirent buffer.
1092 	 */
1093 	for (buf_current = local_buf;
1094 	    (size_t)(buf_current - local_buf) < buf_size; /* cstyle */) {
1095 		if (strcmp(dirent_name(buf_current), VIEW_READONLY) != 0 &&
1096 		    strcmp(dirent_name(buf_current), VIEW_READWRITE) != 0) {
1097 			/*
1098 			 * The dirent refers to an attribute that should
1099 			 * be visible to Solaris 10 processes.  Keep it
1100 			 * and examine the next entry in the buffer.
1101 			 */
1102 			buf_current += dirent_reclen(buf_current);
1103 		} else {
1104 			/*
1105 			 * We found either SUNWattr_ro (VIEW_READONLY)
1106 			 * or SUNWattr_rw (VIEW_READWRITE).  Remove it
1107 			 * from the dirent buffer by decrementing
1108 			 * buf_size by the size of the entry and
1109 			 * overwriting the entry with the remaining
1110 			 * entries.
1111 			 */
1112 			buf_size -= dirent_reclen(buf_current);
1113 			(void) memmove(buf_current, buf_current +
1114 			    dirent_reclen(buf_current), buf_size -
1115 			    (size_t)(buf_current - local_buf));
1116 		}
1117 	}
1118 
1119 	/*
1120 	 * Copy local_buf into buf so that the calling process can see
1121 	 * the results.
1122 	 */
1123 	if ((err = s10_uucopy(local_buf, buf, buf_size)) != 0) {
1124 		free(local_buf);
1125 		rval->sys_rval1 = -1;
1126 		rval->sys_rval2 = 0;
1127 		return (err);
1128 	}
1129 	rval->sys_rval1 = buf_size;
1130 	free(local_buf);
1131 	return (0);
1132 }
1133 
1134 /*
1135  * Solaris Next added two special extended file attributes, SUNWattr_ro and
1136  * SUNWattr_rw, which are called "extended system attributes".  They have
1137  * special semantics (e.g., a process cannot unlink SUNWattr_ro) and should
1138  * not appear in solaris10-branded zones because no Solaris 10 applications,
1139  * including system commands such as tar(1), are coded to correctly handle these
1140  * special attributes.
1141  *
1142  * This emulation function solves the aforementioned problem by emulating
1143  * the getdents(2) syscall and filtering both system attributes out of resulting
1144  * directory entry lists.  The emulation function only filters results when
1145  * the given file descriptor refers to an extended file attribute directory.
1146  * Filtering getdents(2) results is expensive because it requires dynamic
1147  * memory allocation; however, the performance cost is tolerable because
1148  * we don't expect Solaris 10 processes to frequently examine extended file
1149  * attribute directories.
1150  *
1151  * The brand's emulation library needs two getdents(2) emulation functions
1152  * because getdents(2) comes in two flavors: non-largefile-aware getdents(2)
1153  * and largefile-aware getdents64(2).  s10_getdents() handles the non-largefile-
1154  * aware case for 32-bit processes and all getdents(2) syscalls for 64-bit
1155  * processes (64-bit processes use largefile-aware interfaces by default).
1156  * See s10_getdents64() below for the largefile-aware getdents64(2) emulation
1157  * function for 32-bit processes.
1158  */
1159 static int
1160 s10_getdents(sysret_t *rval, int fd, struct dirent *buf, size_t nbyte)
1161 {
1162 	return (s10_getdents_common(rval, fd, (char *)buf, nbyte, SYS_getdents,
1163 	    offsetof(struct dirent, d_name),
1164 	    offsetof(struct dirent, d_reclen)));
1165 }
1166 
1167 #ifndef	_LP64
1168 /*
1169  * This is the largefile-aware version of getdents(2) for 32-bit processes.
1170  * This exists for the same reason that s10_getdents() exists.  See the comment
1171  * above s10_getdents().
1172  */
1173 static int
1174 s10_getdents64(sysret_t *rval, int fd, struct dirent64 *buf, size_t nbyte)
1175 {
1176 	return (s10_getdents_common(rval, fd, (char *)buf, nbyte,
1177 	    SYS_getdents64, offsetof(struct dirent64, d_name),
1178 	    offsetof(struct dirent64, d_reclen)));
1179 }
1180 #endif	/* !_LP64 */
1181 
1182 #define	S10_AC_PROC		(0x1 << 28)
1183 #define	S10_AC_TASK		(0x2 << 28)
1184 #define	S10_AC_FLOW		(0x4 << 28)
1185 #define	S10_AC_MODE(x)		((x) & 0xf0000000)
1186 #define	S10_AC_OPTION(x)	((x) & 0x0fffffff)
1187 
1188 /*
1189  * The mode shift, mode mask and option mask for acctctl have changed.  The
1190  * mode is currently the top full byte and the option is the lower 3 full bytes.
1191  */
1192 int
1193 s10_acctctl(sysret_t *rval, int cmd, void *buf, size_t bufsz)
1194 {
1195 	int mode = S10_AC_MODE(cmd);
1196 	int option = S10_AC_OPTION(cmd);
1197 
1198 	switch (mode) {
1199 	case S10_AC_PROC:
1200 		mode = AC_PROC;
1201 		break;
1202 	case S10_AC_TASK:
1203 		mode = AC_TASK;
1204 		break;
1205 	case S10_AC_FLOW:
1206 		mode = AC_FLOW;
1207 		break;
1208 	default:
1209 		return (S10_TRUSS_POINT_3(rval, SYS_acctctl, EINVAL, cmd, buf,
1210 		    bufsz));
1211 	}
1212 
1213 	return (__systemcall(rval, SYS_acctctl + 1024, mode | option, buf,
1214 	    bufsz));
1215 }
1216 
1217 /*
1218  * The Audit Policy parameters have changed due to:
1219  *    6466722 audituser and AUDIT_USER are defined, unused, undocumented and
1220  *            should be removed.
1221  *
1222  * In S10 we had the following flag:
1223  *	#define AUDIT_USER 0x0040
1224  * which doesn't exist in Solaris Next where the subsequent flags are shifted
1225  * down.  For example, in S10 we had:
1226  *	#define AUDIT_GROUP     0x0080
1227  * but on Solaris Next we have:
1228  *	#define AUDIT_GROUP     0x0040
1229  * AUDIT_GROUP has the value AUDIT_USER had in S10 and all of the subsequent
1230  * bits are also shifted one place.
1231  *
1232  * When we're getting or setting the Audit Policy parameters we need to
1233  * shift the outgoing or incoming bits into their proper positions.  Since
1234  * S10_AUDIT_USER was always unused, we always clear that bit on A_GETPOLICY.
1235  *
1236  * The command we care about, BSM_AUDITCTL, passes the most parameters (3),
1237  * so declare this function to take up to 4 args and just pass them on.
1238  * The number of parameters for s10_auditsys needs to be equal to the BSM_*
1239  * subcommand that has the most parameters, since we want to pass all
1240  * parameters through, regardless of which subcommands we interpose on.
1241  *
1242  * Note that the auditsys system call uses the SYSENT_AP macro wrapper instead
1243  * of the more common SYSENT_CI macro.  This means the return value is a
1244  * SE_64RVAL so the syscall table uses RV_64RVAL.
1245  */
1246 
1247 #define	S10_AUDIT_HMASK	0xffffffc0
1248 #define	S10_AUDIT_LMASK	0x3f
1249 #define	S10_AUC_NOSPACE	0x3
1250 
1251 int
1252 s10_auditsys(sysret_t *rval, int bsmcmd, intptr_t a0, intptr_t a1, intptr_t a2)
1253 {
1254 	int	    err;
1255 	uint32_t    m;
1256 
1257 	if (bsmcmd != BSM_AUDITCTL)
1258 		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1,
1259 		    a2));
1260 
1261 	if ((int)a0 == A_GETPOLICY) {
1262 		if ((err = __systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0,
1263 		    &m, a2)) != 0)
1264 			return (err);
1265 		m = ((m & S10_AUDIT_HMASK) << 1) | (m & S10_AUDIT_LMASK);
1266 		if (s10_uucopy(&m, (void *)a1, sizeof (m)) != 0)
1267 			return (EFAULT);
1268 		return (0);
1269 
1270 	} else if ((int)a0 == A_SETPOLICY) {
1271 		if (s10_uucopy((const void *)a1, &m, sizeof (m)) != 0)
1272 			return (EFAULT);
1273 		m = ((m >> 1) & S10_AUDIT_HMASK) | (m & S10_AUDIT_LMASK);
1274 		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, &m,
1275 		    a2));
1276 	} else if ((int)a0 == A_GETCOND) {
1277 		if ((err = __systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0,
1278 		    &m, a2)) != 0)
1279 			return (err);
1280 		if (m == AUC_NOSPACE)
1281 			m = S10_AUC_NOSPACE;
1282 		if (s10_uucopy(&m, (void *)a1, sizeof (m)) != 0)
1283 			return (EFAULT);
1284 		return (0);
1285 	} else if ((int)a0 == A_SETCOND) {
1286 		if (s10_uucopy((const void *)a1, &m, sizeof (m)) != 0)
1287 			return (EFAULT);
1288 		if (m == S10_AUC_NOSPACE)
1289 			m = AUC_NOSPACE;
1290 		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, &m,
1291 		    a2));
1292 	}
1293 
1294 	return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1, a2));
1295 }
1296 
1297 /*
1298  * Determine whether the executable passed to SYS_exec or SYS_execve is a
1299  * native executable.  The s10_npreload.so invokes the B_S10_NATIVE brand
1300  * operation which patches up the processes exec info to eliminate any trace
1301  * of the wrapper.  That will make pgrep and other commands that examine
1302  * process' executable names and command-line parameters work properly.
1303  */
1304 static int
1305 s10_exec_native(sysret_t *rval, const char *fname, const char **argp,
1306     const char **envp)
1307 {
1308 	const char *filename = fname;
1309 	char path[64];
1310 	int err;
1311 
1312 	/* Get a copy of the executable we're trying to run */
1313 	path[0] = '\0';
1314 	(void) s10_uucopystr(filename, path, sizeof (path));
1315 
1316 	/* Check if we're trying to run a native binary */
1317 	if (strncmp(path, "/.SUNWnative/usr/lib/brand/solaris10/s10_native",
1318 	    sizeof (path)) != 0)
1319 		return (0);
1320 
1321 	/* Skip the first element in the argv array */
1322 	argp++;
1323 
1324 	/*
1325 	 * The the path of the dynamic linker is the second parameter
1326 	 * of s10_native_exec().
1327 	 */
1328 	if (s10_uucopy(argp, &filename, sizeof (char *)) != 0)
1329 		return (EFAULT);
1330 
1331 	/* If an exec call succeeds, it never returns */
1332 	err = __systemcall(rval, SYS_brand + 1024, B_EXEC_NATIVE, filename,
1333 	    argp, envp, NULL, NULL, NULL);
1334 	s10_assert(err != 0);
1335 	return (err);
1336 }
1337 
1338 /*
1339  * Interpose on the SYS_exec syscall to detect native wrappers.
1340  */
1341 int
1342 s10_exec(sysret_t *rval, const char *fname, const char **argp)
1343 {
1344 	int err;
1345 
1346 	if ((err = s10_exec_native(rval, fname, argp, NULL)) != 0)
1347 		return (err);
1348 
1349 	/* If an exec call succeeds, it never returns */
1350 	err = __systemcall(rval, SYS_execve + 1024, fname, argp, NULL);
1351 	s10_assert(err != 0);
1352 	return (err);
1353 }
1354 
1355 /*
1356  * Interpose on the SYS_execve syscall to detect native wrappers.
1357  */
1358 int
1359 s10_execve(sysret_t *rval, const char *fname, const char **argp,
1360     const char **envp)
1361 {
1362 	int err;
1363 
1364 	if ((err = s10_exec_native(rval, fname, argp, envp)) != 0)
1365 		return (err);
1366 
1367 	/* If an exec call succeeds, it never returns */
1368 	err = __systemcall(rval, SYS_execve + 1024, fname, argp, envp);
1369 	s10_assert(err != 0);
1370 	return (err);
1371 }
1372 
1373 /*
1374  * S10's issetugid() syscall is now a subcode to privsys().
1375  */
1376 static int
1377 s10_issetugid(sysret_t *rval)
1378 {
1379 	return (__systemcall(rval, SYS_privsys + 1024, PRIVSYS_ISSETUGID,
1380 	    0, 0, 0, 0, 0));
1381 }
1382 
1383 static long
1384 s10_uname(sysret_t *rv, uintptr_t p1)
1385 {
1386 	struct utsname un, *unp = (struct utsname *)p1;
1387 	int rev, err;
1388 
1389 	if ((err = __systemcall(rv, SYS_uname + 1024, &un)) != 0)
1390 		return (err);
1391 
1392 	rev = atoi(&un.release[2]);
1393 	s10_assert(rev >= 11);
1394 	bzero(un.release, _SYS_NMLN);
1395 	(void) strlcpy(un.release, S10_UTS_RELEASE, _SYS_NMLN);
1396 	bzero(un.version, _SYS_NMLN);
1397 	(void) strlcpy(un.version, S10_UTS_VERSION, _SYS_NMLN);
1398 
1399 	/* copy out the modified uname info */
1400 	return (s10_uucopy(&un, unp, sizeof (un)));
1401 }
1402 
1403 int
1404 s10_sysconfig(sysret_t *rv, int which)
1405 {
1406 	long value;
1407 
1408 	/*
1409 	 * We must interpose on the sysconfig(2) requests
1410 	 * that deal with the realtime signal number range.
1411 	 * All others get passed to the native sysconfig(2).
1412 	 */
1413 	switch (which) {
1414 	case _CONFIG_RTSIG_MAX:
1415 		value = S10_SIGRTMAX - S10_SIGRTMIN + 1;
1416 		break;
1417 	case _CONFIG_SIGRT_MIN:
1418 		value = S10_SIGRTMIN;
1419 		break;
1420 	case _CONFIG_SIGRT_MAX:
1421 		value = S10_SIGRTMAX;
1422 		break;
1423 	default:
1424 		return (__systemcall(rv, SYS_sysconfig + 1024, which));
1425 	}
1426 
1427 	(void) S10_TRUSS_POINT_1(rv, SYS_sysconfig, 0, which);
1428 	rv->sys_rval1 = value;
1429 	rv->sys_rval2 = 0;
1430 
1431 	return (0);
1432 }
1433 
1434 int
1435 s10_sysinfo(sysret_t *rv, int command, char *buf, long count)
1436 {
1437 	char *value;
1438 	int len;
1439 
1440 	/*
1441 	 * We must interpose on the sysinfo(2) commands SI_RELEASE and
1442 	 * SI_VERSION; all others get passed to the native sysinfo(2)
1443 	 * command.
1444 	 */
1445 	switch (command) {
1446 		case SI_RELEASE:
1447 			value = S10_UTS_RELEASE;
1448 			break;
1449 
1450 		case SI_VERSION:
1451 			value = S10_UTS_VERSION;
1452 			break;
1453 
1454 		default:
1455 			/*
1456 			 * The default action is to pass the command to the
1457 			 * native sysinfo(2) syscall.
1458 			 */
1459 			return (__systemcall(rv, SYS_systeminfo + 1024,
1460 			    command, buf, count));
1461 	}
1462 
1463 	len = strlen(value) + 1;
1464 	if (count > 0) {
1465 		if (s10_uucopystr(value, buf, count) != 0)
1466 			return (EFAULT);
1467 
1468 		/* Assure NULL termination of buf as s10_uucopystr() doesn't. */
1469 		if (len > count && s10_uucopy("\0", buf + (count - 1), 1) != 0)
1470 			return (EFAULT);
1471 	}
1472 
1473 	/*
1474 	 * On success, sysinfo(2) returns the size of buffer required to hold
1475 	 * the complete value plus its terminating NULL byte.
1476 	 */
1477 	(void) S10_TRUSS_POINT_3(rv, SYS_systeminfo, 0, command, buf, count);
1478 	rv->sys_rval1 = len;
1479 	rv->sys_rval2 = 0;
1480 	return (0);
1481 }
1482 
1483 #if defined(__x86)
1484 #if defined(__amd64)
1485 /*
1486  * 64-bit x86 LWPs created by SYS_lwp_create start here if they need to set
1487  * their %fs registers to the legacy Solaris 10 selector value.
1488  *
1489  * This function does three things:
1490  *
1491  *	1.  Trap to the kernel so that it can set %fs to the legacy Solaris 10
1492  *	    selector value.
1493  *	2.  Read the LWP's true entry point (the entry point supplied by libc
1494  *	    when SYS_lwp_create was invoked) from %r14.
1495  *	3.  Eliminate this function's stack frame and pass control to the LWP's
1496  *	    true entry point.
1497  *
1498  * See the comment above s10_lwp_create_correct_fs() (see below) for the reason
1499  * why this function exists.
1500  */
1501 /*ARGSUSED*/
1502 static void
1503 s10_lwp_create_entry_point(void *ulwp_structp)
1504 {
1505 	sysret_t rval;
1506 
1507 	/*
1508 	 * The new LWP's %fs register is initially zero, but libc won't
1509 	 * function correctly when %fs is zero.  Change the LWP's %fs register
1510 	 * via SYS_brand.
1511 	 */
1512 	(void) __systemcall(&rval, SYS_brand + 1024, B_S10_FSREGCORRECTION);
1513 
1514 	/*
1515 	 * Jump to the true entry point, which is stored in %r14.
1516 	 * Remove our stack frame before jumping so that
1517 	 * s10_lwp_create_entry_point() won't be seen in stack traces.
1518 	 *
1519 	 * NOTE: s10_lwp_create_entry_point() pushes %r12 onto its stack frame
1520 	 * so that it can use it as a temporary register.  We don't restore %r12
1521 	 * in this assembly block because we don't care about its value (and
1522 	 * neither does _lwp_start()).  Besides, the System V ABI AMD64
1523 	 * Actirecture Processor Supplement doesn't specify that %r12 should
1524 	 * have a special value when LWPs start, so we can ignore its value when
1525 	 * we jump to the true entry point.  Furthermore, %r12 is a callee-saved
1526 	 * register, so the true entry point should push %r12 onto its stack
1527 	 * before using the register.  We ignore %r14 after we read it for
1528 	 * similar reasons.
1529 	 *
1530 	 * NOTE: The compiler will generate a function epilogue for this
1531 	 * function despite the fact that the LWP will never execute it.
1532 	 * We could hand-code this entire function in assembly to eliminate
1533 	 * the epilogue, but the epilogue is only three or four instructions,
1534 	 * so we wouldn't save much space.  Besides, why would we want
1535 	 * to create yet another ugly, hard-to-maintain assembly function when
1536 	 * we could write most of it in C?
1537 	 */
1538 	__asm__ __volatile__(
1539 	    "movq %0, %%rdi\n\t"	/* pass ulwp_structp as arg1 */
1540 	    "movq %%rbp, %%rsp\n\t"	/* eliminate the stack frame */
1541 	    "popq %%rbp\n\t"
1542 	    "jmp *%%r14\n\t"		/* jump to the true entry point */
1543 	    : : "r" (ulwp_structp));
1544 	/*NOTREACHED*/
1545 }
1546 
1547 /*
1548  * The S10 libc expects that %fs will be nonzero for new 64-bit x86 LWPs but the
1549  * Nevada kernel clears %fs for such LWPs.  Unforunately, new LWPs do not issue
1550  * SYS_lwp_private (see s10_lwp_private() below) after they are created, so
1551  * we must ensure that new LWPs invoke a brand operation that sets %fs to a
1552  * nonzero value immediately after their creation.
1553  *
1554  * The easiest way to do this is to make new LWPs start at a special function,
1555  * s10_lwp_create_entry_point() (see its definition above), that invokes the
1556  * brand operation that corrects %fs.  We'll store the entry points of new LWPs
1557  * in their %r14 registers so that s10_lwp_create_entry_point() can find and
1558  * call them after invoking the special brand operation.  %r14 is a callee-saved
1559  * register; therefore, any functions invoked by s10_lwp_create_entry_point()
1560  * and all functions dealing with signals (e.g., sigacthandler()) will preserve
1561  * %r14 for s10_lwp_create_entry_point().
1562  *
1563  * The Nevada kernel can safely work with nonzero %fs values because the kernel
1564  * configures per-thread %fs segment descriptors so that the legacy %fs selector
1565  * value will still work.  See the comment in lwp_load() regarding %fs and
1566  * %fsbase in 64-bit x86 processes.
1567  *
1568  * This emulation exists thanks to CRs 6467491 and 6501650.
1569  */
1570 static int
1571 s10_lwp_create_correct_fs(sysret_t *rval, ucontext_t *ucp, int flags,
1572     id_t *new_lwp)
1573 {
1574 	ucontext_t s10_uc;
1575 
1576 	/*
1577 	 * Copy the supplied ucontext_t structure to the local stack
1578 	 * frame and store the new LWP's entry point (the value of %rip
1579 	 * stored in the ucontext_t) in the new LWP's %r14 register.
1580 	 * Then make s10_lwp_create_entry_point() the new LWP's entry
1581 	 * point.
1582 	 */
1583 	if (s10_uucopy(ucp, &s10_uc, sizeof (s10_uc)) != 0)
1584 		return (EFAULT);
1585 
1586 	s10_uc.uc_mcontext.gregs[REG_R14] = s10_uc.uc_mcontext.gregs[REG_RIP];
1587 	s10_uc.uc_mcontext.gregs[REG_RIP] = (greg_t)s10_lwp_create_entry_point;
1588 
1589 	/*  fix up the signal mask */
1590 	if (s10_uc.uc_flags & UC_SIGMASK)
1591 		(void) s10sigset_to_native(&s10_uc.uc_sigmask,
1592 		    &s10_uc.uc_sigmask);
1593 
1594 	/*
1595 	 * Issue SYS_lwp_create to create the new LWP.  We pass the
1596 	 * modified ucontext_t to make sure that the new LWP starts at
1597 	 * s10_lwp_create_entry_point().
1598 	 */
1599 	return (__systemcall(rval, SYS_lwp_create + 1024, &s10_uc,
1600 	    flags, new_lwp));
1601 }
1602 #endif	/* __amd64 */
1603 
1604 /*
1605  * SYS_lwp_private is issued by libc_init() to set %fsbase in 64-bit x86
1606  * processes.  The Nevada kernel sets %fs to zero but the S10 libc expects
1607  * %fs to be nonzero.  We'll pass the issued system call to the kernel untouched
1608  * and invoke a brand operation to set %fs to the legacy S10 selector value.
1609  *
1610  * This emulation exists thanks to CRs 6467491 and 6501650.
1611  */
1612 static int
1613 s10_lwp_private(sysret_t *rval, int cmd, int which, uintptr_t base)
1614 {
1615 #if defined(__amd64)
1616 	int err;
1617 
1618 	/*
1619 	 * The current LWP's %fs register should be zero.  Determine whether the
1620 	 * Solaris 10 libc with which we're working functions correctly when %fs
1621 	 * is zero by calling thr_main() after issuing the SYS_lwp_private
1622 	 * syscall.  If thr_main() barfs (returns -1), then change the LWP's %fs
1623 	 * register via SYS_brand and patch s10_sysent_table so that issuing
1624 	 * SYS_lwp_create executes s10_lwp_create_correct_fs() rather than the
1625 	 * default s10_lwp_create().  s10_lwp_create_correct_fs() will
1626 	 * guarantee that new LWPs will have correct %fs values.
1627 	 */
1628 	if ((err = __systemcall(rval, SYS_lwp_private + 1024, cmd, which,
1629 	    base)) != 0)
1630 		return (err);
1631 	if (thr_main() == -1) {
1632 		/*
1633 		 * SYS_lwp_private is only issued by libc_init(), which is
1634 		 * executed when libc is first loaded by ld.so.1.  Thus we
1635 		 * are guaranteed to be single-threaded at this point.  Even
1636 		 * if we were multithreaded at this point, writing a 64-bit
1637 		 * value to the st_callc field of a s10_sysent_table
1638 		 * entry is guaranteed to be atomic on 64-bit x86 chips
1639 		 * as long as the field is not split across cache lines
1640 		 * (It shouldn't be.).  See chapter 8, section 1.1 of
1641 		 * "The Intel 64 and IA32 Architectures Software Developer's
1642 		 * Manual," Volume 3A for more details.
1643 		 */
1644 		s10_sysent_table[SYS_lwp_create].st_callc =
1645 		    (sysent_cb_t)s10_lwp_create_correct_fs;
1646 		return (__systemcall(rval, SYS_brand + 1024,
1647 		    B_S10_FSREGCORRECTION));
1648 	}
1649 	return (0);
1650 #else	/* !__amd64 */
1651 	return (__systemcall(rval, SYS_lwp_private + 1024, cmd, which, base));
1652 #endif	/* !__amd64 */
1653 }
1654 #endif	/* __x86 */
1655 
1656 /*
1657  * The Opensolaris versions of lwp_mutex_timedlock() and lwp_mutex_trylock()
1658  * add an extra argument to the interfaces, a uintptr_t value for the mutex's
1659  * mutex_owner field.  The Solaris 10 libc assigns the mutex_owner field at
1660  * user-level, so we just make the extra argument be zero in both syscalls.
1661  */
1662 
1663 static int
1664 s10_lwp_mutex_timedlock(sysret_t *rval, lwp_mutex_t *lp, timespec_t *tsp)
1665 {
1666 	return (__systemcall(rval, SYS_lwp_mutex_timedlock + 1024, lp, tsp, 0));
1667 }
1668 
1669 static int
1670 s10_lwp_mutex_trylock(sysret_t *rval, lwp_mutex_t *lp)
1671 {
1672 	return (__systemcall(rval, SYS_lwp_mutex_trylock + 1024, lp, 0));
1673 }
1674 
1675 /*
1676  * If the emul_global_zone flag is set then emulate some aspects of the
1677  * zone system call.  In particular, emulate the global zone ID on the
1678  * ZONE_LOOKUP subcommand and emulate some of the global zone attributes
1679  * on the ZONE_GETATTR subcommand.  If the flag is not set or we're performing
1680  * some other operation, simply pass the calls through.
1681  */
1682 int
1683 s10_zone(sysret_t *rval, int cmd, void *arg1, void *arg2, void *arg3,
1684     void *arg4)
1685 {
1686 	char		*aval;
1687 	int		len;
1688 	zoneid_t	zid;
1689 	int		attr;
1690 	char		*buf;
1691 	size_t		bufsize;
1692 
1693 	/*
1694 	 * We only emulate the zone syscall for a subset of specific commands,
1695 	 * otherwise we just pass the call through.
1696 	 */
1697 	if (!emul_global_zone)
1698 		return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2,
1699 		    arg3, arg4));
1700 
1701 	switch (cmd) {
1702 	case ZONE_LOOKUP:
1703 		(void) S10_TRUSS_POINT_1(rval, SYS_zone, 0, cmd);
1704 		rval->sys_rval1 = GLOBAL_ZONEID;
1705 		rval->sys_rval2 = 0;
1706 		return (0);
1707 
1708 	case ZONE_GETATTR:
1709 		zid = (zoneid_t)(uintptr_t)arg1;
1710 		attr = (int)(uintptr_t)arg2;
1711 		buf = (char *)arg3;
1712 		bufsize = (size_t)arg4;
1713 
1714 		/*
1715 		 * If the request is for the global zone then we're emulating
1716 		 * that, otherwise pass this thru.
1717 		 */
1718 		if (zid != GLOBAL_ZONEID)
1719 			goto passthru;
1720 
1721 		switch (attr) {
1722 		case ZONE_ATTR_NAME:
1723 			aval = GLOBAL_ZONENAME;
1724 			break;
1725 
1726 		case ZONE_ATTR_BRAND:
1727 			aval = NATIVE_BRAND_NAME;
1728 			break;
1729 		default:
1730 			/*
1731 			 * We only emulate a subset of the attrs, use the
1732 			 * real zone id to pass thru the rest.
1733 			 */
1734 			arg1 = (void *)(uintptr_t)zoneid;
1735 			goto passthru;
1736 		}
1737 
1738 		(void) S10_TRUSS_POINT_5(rval, SYS_zone, 0, cmd, zid, attr,
1739 		    buf, bufsize);
1740 
1741 		len = strlen(aval) + 1;
1742 		if (len > bufsize)
1743 			return (ENAMETOOLONG);
1744 
1745 		if (buf != NULL) {
1746 			if (len == 1) {
1747 				if (s10_uucopy("\0", buf, 1) != 0)
1748 					return (EFAULT);
1749 			} else {
1750 				if (s10_uucopystr(aval, buf, len) != 0)
1751 					return (EFAULT);
1752 
1753 				/*
1754 				 * Assure NULL termination of "buf" as
1755 				 * s10_uucopystr() does NOT.
1756 				 */
1757 				if (s10_uucopy("\0", buf + (len - 1), 1) != 0)
1758 					return (EFAULT);
1759 			}
1760 		}
1761 
1762 		rval->sys_rval1 = len;
1763 		rval->sys_rval2 = 0;
1764 		return (0);
1765 
1766 	default:
1767 		break;
1768 	}
1769 
1770 passthru:
1771 	return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2, arg3,
1772 	    arg4));
1773 }
1774 
1775 /*
1776  * Close a libc file handle, but don't actually close the underlying
1777  * file descriptor.
1778  */
1779 static void
1780 s10_close_fh(FILE *file)
1781 {
1782 	int fd, fd_new;
1783 
1784 	if (file == NULL)
1785 		return;
1786 
1787 	if ((fd = fileno(file)) < 0)
1788 		return;
1789 
1790 	/*
1791 	 * We're a branded process but our handler isn't installed yet.  We
1792 	 * can't use the dup() syscall since it no longer exists.
1793 	 */
1794 	fd_new = fcntl(fd, F_DUPFD, 0);
1795 	if (fd_new == -1)
1796 		return;
1797 
1798 	(void) fclose(file);
1799 	(void) dup2(fd_new, fd);
1800 	(void) close(fd_new);
1801 }
1802 
1803 /*ARGSUSED*/
1804 int
1805 s10_init(int argc, char *argv[], char *envp[])
1806 {
1807 	sysret_t		rval;
1808 	s10_brand_reg_t		reg;
1809 	s10_elf_data_t		sed;
1810 	auxv_t			*ap;
1811 	uintptr_t		*p;
1812 	int			i, err;
1813 	char			*bname;
1814 
1815 	/* Sanity check our translation table return value codes */
1816 	for (i = 0; i < NSYSCALL; i++) {
1817 		s10_sysent_table_t *est = &(s10_sysent_table[i]);
1818 		s10_assert(BIT_ONLYONESET(est->st_args & RV_MASK));
1819 	}
1820 
1821 	/*
1822 	 * We need to shutdown all libc stdio.  libc stdio normally goes to
1823 	 * file descriptors, but since we're actually part of a another
1824 	 * process we don't own these file descriptors and we can't make
1825 	 * any assumptions about their state.
1826 	 */
1827 	s10_close_fh(stdin);
1828 	s10_close_fh(stdout);
1829 	s10_close_fh(stderr);
1830 
1831 	/*
1832 	 * Cache the pid of the zone's init process and determine if
1833 	 * we're init(1m) for the zone.  Remember: we might be init
1834 	 * now, but as soon as we fork(2) we won't be.
1835 	 */
1836 	(void) get_initpid_info();
1837 
1838 	/* get the current zoneid */
1839 	err = __systemcall(&rval, SYS_zone, ZONE_LOOKUP, NULL);
1840 	s10_assert(err == 0);
1841 	zoneid = (zoneid_t)rval.sys_rval1;
1842 
1843 	/* Get the zone's emulation bitmap. */
1844 	if ((err = __systemcall(&rval, SYS_zone, ZONE_GETATTR, zoneid,
1845 	    S10_EMUL_BITMAP, emul_bitmap, sizeof (emul_bitmap))) != 0) {
1846 		s10_abort(err, "The zone's patch level is unsupported");
1847 		/*NOTREACHED*/
1848 	}
1849 
1850 	bname = basename(argv[0]);
1851 
1852 	/*
1853 	 * In general we want the S10 commands that are zone-aware to continue
1854 	 * to behave as they normally do within a zone.  Since these commands
1855 	 * are zone-aware, they should continue to "do the right thing".
1856 	 * However, some zone-aware commands aren't going to work the way
1857 	 * we expect them to inside the branded zone.  In particular, the pkg
1858 	 * and patch commands will not properly manage all pkgs/patches
1859 	 * unless the commands think they are running in the global zone.  For
1860 	 * these commands we want to emulate the global zone.
1861 	 *
1862 	 * We don't do any emulation for pkgcond since it is typically used
1863 	 * in pkg/patch postinstall scripts and we want those scripts to do
1864 	 * the right thing inside a zone.
1865 	 *
1866 	 * One issue is the handling of hollow pkgs.  Since the pkgs are
1867 	 * hollow, they won't use pkgcond in their postinstall scripts.  These
1868 	 * pkgs typically are installing drivers so we handle that by
1869 	 * replacing add_drv and rem_drv in the s10_boot script.
1870 	 */
1871 	if (strcmp("pkgadd", bname) == 0 || strcmp("pkgrm", bname) == 0 ||
1872 	    strcmp("patchadd", bname) == 0 || strcmp("patchrm", bname) == 0)
1873 		emul_global_zone = B_TRUE;
1874 
1875 	/*
1876 	 * Register our syscall emulation table with the kernel.
1877 	 * Note that we don't have to do invoke (syscall_number + 1024)
1878 	 * until we've actually establised a syscall emulation callback
1879 	 * handler address, which is what we're doing with this brand
1880 	 * syscall.
1881 	 */
1882 	reg.sbr_version = S10_VERSION;
1883 #ifdef	__x86
1884 	reg.sbr_handler = (caddr_t)s10_handler_table;
1885 #else	/* !__x86 */
1886 	reg.sbr_handler = (caddr_t)s10_handler;
1887 #endif	/* !__x86 */
1888 
1889 	if ((err = __systemcall(&rval, SYS_brand, B_REGISTER, &reg)) != 0) {
1890 		s10_abort(err, "Failed to brand current process");
1891 		/*NOTREACHED*/
1892 	}
1893 
1894 	/* Get data about the executable we're running from the kernel. */
1895 	if ((err = __systemcall(&rval, SYS_brand + 1024,
1896 	    B_ELFDATA, (void *)&sed)) != 0) {
1897 		s10_abort(err,
1898 		    "Failed to get required brand ELF data from the kernel");
1899 		/*NOTREACHED*/
1900 	}
1901 
1902 	/*
1903 	 * Find the aux vector on the stack.
1904 	 */
1905 	p = (uintptr_t *)envp;
1906 	while (*p != NULL)
1907 		p++;
1908 
1909 	/*
1910 	 * p is now pointing at the 0 word after the environ pointers.
1911 	 * After that is the aux vectors.
1912 	 *
1913 	 * The aux vectors are currently pointing to the brand emulation
1914 	 * library and associated linker.  We're going to change them to
1915 	 * point to the brand executable and associated linker (or to no
1916 	 * linker for static binaries).  This matches the process data
1917 	 * stored within the kernel and visible from /proc, which was
1918 	 * all setup in s10_elfexec().  We do this so that when a debugger
1919 	 * attaches to the process it sees the process as a normal solaris
1920 	 * process, this brand emulation library and everything on it's
1921 	 * link map will not be visible, unless our librtld_db plugin
1922 	 * is used.  Note that this is very different from how Linux
1923 	 * branded processes are implemented within lx branded zones.
1924 	 * In that situation, the primary linkmap of the process is the
1925 	 * brand emulation libraries linkmap, not the Linux applications
1926 	 * linkmap.
1927 	 *
1928 	 * We also need to clear the AF_SUN_NOPLM flag from the AT_SUN_AUXFLAGS
1929 	 * aux vector.  This flag told our linker that we don't have a
1930 	 * primary link map.  Now that our linker is done initializing, we
1931 	 * want to clear this flag before we transfer control to the
1932 	 * applications copy of the linker, since we want that linker to have
1933 	 * a primary link map which will be the link map for the application
1934 	 * we're running.
1935 	 */
1936 	p++;
1937 	for (ap = (auxv_t *)p; ap->a_type != AT_NULL; ap++) {
1938 		switch (ap->a_type) {
1939 			case AT_BASE:
1940 				/* Hide AT_BASE if static binary */
1941 				if (sed.sed_base == NULL) {
1942 					ap->a_type = AT_IGNORE;
1943 					ap->a_un.a_val = NULL;
1944 				} else {
1945 					ap->a_un.a_val = sed.sed_base;
1946 				}
1947 				break;
1948 			case AT_ENTRY:
1949 				ap->a_un.a_val = sed.sed_entry;
1950 				break;
1951 			case AT_PHDR:
1952 				ap->a_un.a_val = sed.sed_phdr;
1953 				break;
1954 			case AT_PHENT:
1955 				ap->a_un.a_val = sed.sed_phent;
1956 				break;
1957 			case AT_PHNUM:
1958 				ap->a_un.a_val = sed.sed_phnum;
1959 				break;
1960 			case AT_SUN_AUXFLAGS:
1961 				ap->a_un.a_val &= ~AF_SUN_NOPLM;
1962 				break;
1963 			case AT_SUN_EMULATOR:
1964 				/*
1965 				 * ld.so.1 inspects AT_SUN_EMULATOR to see if
1966 				 * if it is the linker for the brand emulation
1967 				 * library.  Hide AT_SUN_EMULATOR, as the
1968 				 * linker we are about to jump to is the linker
1969 				 * for the binary.
1970 				 */
1971 				ap->a_type = AT_IGNORE;
1972 				ap->a_un.a_val = NULL;
1973 				break;
1974 			case AT_SUN_LDDATA:
1975 				/* Hide AT_SUN_LDDATA if static binary */
1976 				if (sed.sed_lddata == NULL) {
1977 					ap->a_type = AT_IGNORE;
1978 					ap->a_un.a_val = NULL;
1979 				} else {
1980 					ap->a_un.a_val = sed.sed_lddata;
1981 				}
1982 				break;
1983 			default:
1984 				break;
1985 		}
1986 	}
1987 
1988 	s10_runexe(argv, sed.sed_ldentry);
1989 	/*NOTREACHED*/
1990 	s10_abort(0, "s10_runexe() returned");
1991 	return (-1);
1992 }
1993 
1994 /*
1995  * This table must have at least NSYSCALL entries in it.
1996  *
1997  * The second parameter of each entry in the s10_sysent_table
1998  * contains the number of parameters and flags that describe the
1999  * syscall return value encoding.  See the block comments at the
2000  * top of this file for more information about the syscall return
2001  * value flags and when they should be used.
2002  */
2003 s10_sysent_table_t s10_sysent_table[] = {
2004 #if defined(__sparc) && !defined(__sparcv9)
2005 	EMULATE(s10_indir, 9 | RV_64RVAL),	/*  0 */
2006 #else
2007 	NOSYS,					/*  0 */
2008 #endif
2009 	NOSYS,					/*   1 */
2010 	EMULATE(s10_forkall, 0 | RV_32RVAL2),	/*   2 */
2011 	NOSYS,					/*   3 */
2012 	NOSYS,					/*   4 */
2013 	EMULATE(s10_open, 3 | RV_DEFAULT),	/*   5 */
2014 	NOSYS,					/*   6 */
2015 	EMULATE(s10_wait, 0 | RV_32RVAL2),	/*   7 */
2016 	EMULATE(s10_creat, 2 | RV_DEFAULT),	/*   8 */
2017 	NOSYS,					/*   9 */
2018 	EMULATE(s10_unlink, 1 | RV_DEFAULT),	/*  10 */
2019 	EMULATE(s10_exec, 2 | RV_DEFAULT),	/*  11 */
2020 	NOSYS,					/*  12 */
2021 	NOSYS,					/*  13 */
2022 	NOSYS,					/*  14 */
2023 	NOSYS,					/*  15 */
2024 	EMULATE(s10_chown, 3 | RV_DEFAULT),	/*  16 */
2025 	NOSYS,					/*  17 */
2026 	EMULATE(s10_stat, 2 | RV_DEFAULT),	/*  18 */
2027 	NOSYS,					/*  19 */
2028 	NOSYS,					/*  20 */
2029 	NOSYS,					/*  21 */
2030 	EMULATE(s10_umount, 1 | RV_DEFAULT),	/*  22 */
2031 	NOSYS,					/*  23 */
2032 	NOSYS,					/*  24 */
2033 	NOSYS,					/*  25 */
2034 	NOSYS,					/*  26 */
2035 	NOSYS,					/*  27 */
2036 	EMULATE(s10_fstat, 2 | RV_DEFAULT),	/*  28 */
2037 	NOSYS,					/*  29 */
2038 	EMULATE(s10_utime, 2 | RV_DEFAULT),	/*  30 */
2039 	NOSYS,					/*  31 */
2040 	NOSYS,					/*  32 */
2041 	EMULATE(s10_access, 2 | RV_DEFAULT),	/*  33 */
2042 	NOSYS,					/*  34 */
2043 	NOSYS,					/*  35 */
2044 	NOSYS,					/*  36 */
2045 	EMULATE(s10_kill, 2 | RV_DEFAULT),	/*  37 */
2046 	NOSYS,					/*  38 */
2047 	NOSYS,					/*  39 */
2048 	NOSYS,					/*  40 */
2049 	EMULATE(s10_dup, 1 | RV_DEFAULT),	/*  41 */
2050 	NOSYS,					/*  42 */
2051 	NOSYS,					/*  43 */
2052 	NOSYS,					/*  44 */
2053 	NOSYS,					/*  45 */
2054 	NOSYS,					/*  46 */
2055 	NOSYS,					/*  47 */
2056 	NOSYS,					/*  48 */
2057 	NOSYS,					/*  49 */
2058 	NOSYS,					/*  50 */
2059 	NOSYS,					/*  51 */
2060 	NOSYS,					/*  52 */
2061 	NOSYS,					/*  53 */
2062 	EMULATE(s10_ioctl, 3 | RV_DEFAULT),	/*  54 */
2063 	NOSYS,					/*  55 */
2064 	NOSYS,					/*  56 */
2065 	NOSYS,					/*  57 */
2066 	NOSYS,					/*  58 */
2067 	EMULATE(s10_execve, 3 | RV_DEFAULT),	/*  59 */
2068 	NOSYS,					/*  60 */
2069 	NOSYS,					/*  61 */
2070 	NOSYS,					/*  62 */
2071 	NOSYS,					/*  63 */
2072 	NOSYS,					/*  64 */
2073 	NOSYS,					/*  65 */
2074 	NOSYS,					/*  66 */
2075 	NOSYS,					/*  67 */
2076 	NOSYS,					/*  68 */
2077 	NOSYS,					/*  69 */
2078 	NOSYS,					/*  70 */
2079 	EMULATE(s10_acctctl, 3 | RV_DEFAULT),	/*  71 */
2080 	NOSYS,					/*  72 */
2081 	NOSYS,					/*  73 */
2082 	NOSYS,					/*  74 */
2083 	EMULATE(s10_issetugid, 0 | RV_DEFAULT),	/*  75 */
2084 	EMULATE(s10_fsat, 6 | RV_DEFAULT),	/*  76 */
2085 	NOSYS,					/*  77 */
2086 	NOSYS,					/*  78 */
2087 	EMULATE(s10_rmdir, 1 | RV_DEFAULT),	/*  79 */
2088 	NOSYS,					/*  80 */
2089 	EMULATE(s10_getdents, 3 | RV_DEFAULT),	/*  81 */
2090 	NOSYS,					/*  82 */
2091 	NOSYS,					/*  83 */
2092 	NOSYS,					/*  84 */
2093 	NOSYS,					/*  85 */
2094 	NOSYS,					/*  86 */
2095 	EMULATE(s10_poll, 3 | RV_DEFAULT),	/*  87 */
2096 	EMULATE(s10_lstat, 2 | RV_DEFAULT),	/*  88 */
2097 	NOSYS,					/*  89 */
2098 	NOSYS,					/*  90 */
2099 	NOSYS,					/*  91 */
2100 	NOSYS,					/*  92 */
2101 	NOSYS,					/*  93 */
2102 	EMULATE(s10_fchown, 3 | RV_DEFAULT),	/*  94 */
2103 	EMULATE(s10_sigprocmask, 3 | RV_DEFAULT), /*  95 */
2104 	EMULATE(s10_sigsuspend, 1 | RV_DEFAULT), /*  96 */
2105 	NOSYS,					/*  97 */
2106 	EMULATE(s10_sigaction, 3 | RV_DEFAULT),	/*  98 */
2107 	EMULATE(s10_sigpending, 2 | RV_DEFAULT), /*  99 */
2108 	NOSYS,					/* 100 */
2109 	NOSYS,					/* 101 */
2110 	NOSYS,					/* 102 */
2111 	NOSYS,					/* 103 */
2112 	NOSYS,					/* 104 */
2113 	NOSYS,					/* 105 */
2114 	NOSYS,					/* 106 */
2115 	EMULATE(s10_waitid, 4 | RV_DEFAULT),	/* 107 */
2116 	EMULATE(s10_sigsendsys, 2 | RV_DEFAULT), /* 108 */
2117 	NOSYS,					/* 109 */
2118 	NOSYS,					/* 110 */
2119 	NOSYS,					/* 111 */
2120 	NOSYS,					/* 112 */
2121 	NOSYS,					/* 113 */
2122 	NOSYS,					/* 114 */
2123 	NOSYS,					/* 115 */
2124 	NOSYS,					/* 116 */
2125 	NOSYS,					/* 117 */
2126 	NOSYS,					/* 118 */
2127 	NOSYS,					/* 119 */
2128 	NOSYS,					/* 120 */
2129 	NOSYS,					/* 121 */
2130 	NOSYS,					/* 122 */
2131 #if defined(__x86)
2132 	EMULATE(s10_xstat, 3 | RV_DEFAULT),	/* 123 */
2133 	EMULATE(s10_lxstat, 3 | RV_DEFAULT),	/* 124 */
2134 	EMULATE(s10_fxstat, 3 | RV_DEFAULT),	/* 125 */
2135 	EMULATE(s10_xmknod, 4 | RV_DEFAULT),	/* 126 */
2136 #else
2137 	NOSYS,					/* 123 */
2138 	NOSYS,					/* 124 */
2139 	NOSYS,					/* 125 */
2140 	NOSYS,					/* 126 */
2141 #endif
2142 	NOSYS,					/* 127 */
2143 	NOSYS,					/* 128 */
2144 	NOSYS,					/* 129 */
2145 	EMULATE(s10_lchown, 3 | RV_DEFAULT),	/* 130 */
2146 	NOSYS,					/* 131 */
2147 	NOSYS,					/* 132 */
2148 	NOSYS,					/* 133 */
2149 	EMULATE(s10_rename, 2 | RV_DEFAULT),	/* 134 */
2150 	EMULATE(s10_uname, 1 | RV_DEFAULT),	/* 135 */
2151 	NOSYS,					/* 136 */
2152 	EMULATE(s10_sysconfig, 1 | RV_DEFAULT),	/* 137 */
2153 	NOSYS,					/* 138 */
2154 	EMULATE(s10_sysinfo, 3 | RV_DEFAULT),	/* 139 */
2155 	NOSYS,					/* 140 */
2156 	NOSYS,					/* 141 */
2157 	NOSYS,					/* 142 */
2158 	EMULATE(s10_fork1, 0 | RV_32RVAL2),	/* 143 */
2159 	EMULATE(s10_sigtimedwait, 3 | RV_DEFAULT), /* 144 */
2160 	NOSYS,					/* 145 */
2161 	NOSYS,					/* 146 */
2162 	EMULATE(s10_lwp_sema_wait, 1 | RV_DEFAULT), /* 147 */
2163 	NOSYS,					/* 148 */
2164 	NOSYS,					/* 149 */
2165 	NOSYS,					/* 150 */
2166 	NOSYS,					/* 151 */
2167 	NOSYS,					/* 152 */
2168 	NOSYS,					/* 153 */
2169 	EMULATE(s10_utimes, 2 | RV_DEFAULT),	/* 154 */
2170 	NOSYS,					/* 155 */
2171 	NOSYS,					/* 156 */
2172 	NOSYS,					/* 157 */
2173 	NOSYS,					/* 158 */
2174 	EMULATE(s10_lwp_create, 3 | RV_DEFAULT), /* 159 */
2175 	NOSYS,					/* 160 */
2176 	NOSYS,					/* 161 */
2177 	NOSYS,					/* 162 */
2178 	EMULATE(s10_lwp_kill, 2 | RV_DEFAULT),	/* 163 */
2179 	NOSYS,					/* 164 */
2180 	EMULATE(s10_lwp_sigmask, 3 | RV_32RVAL2), /* 165 */
2181 #if defined(__x86)
2182 	EMULATE(s10_lwp_private, 3 | RV_DEFAULT), /* 166 */
2183 #else
2184 	NOSYS,					/* 166 */
2185 #endif
2186 	NOSYS,					/* 167 */
2187 	NOSYS,					/* 168 */
2188 	EMULATE(s10_lwp_mutex_lock, 1 | RV_DEFAULT), /* 169 */
2189 	NOSYS,					/* 170 */
2190 	NOSYS,					/* 171 */
2191 	NOSYS,					/* 172 */
2192 	NOSYS,					/* 173 */
2193 	EMULATE(s10_pwrite, 4 | RV_DEFAULT),	/* 174 */
2194 	NOSYS,					/* 175 */
2195 	NOSYS,					/* 176 */
2196 	NOSYS,					/* 177 */
2197 	NOSYS,					/* 178 */
2198 	NOSYS,					/* 179 */
2199 	NOSYS,					/* 180 */
2200 	NOSYS,					/* 181 */
2201 	NOSYS,					/* 182 */
2202 	NOSYS,					/* 183 */
2203 	NOSYS,					/* 184 */
2204 	NOSYS,					/* 185 */
2205 	EMULATE(s10_auditsys, 4 | RV_64RVAL),	/* 186 */
2206 	NOSYS,					/* 187 */
2207 	NOSYS,					/* 188 */
2208 	NOSYS,					/* 189 */
2209 	EMULATE(s10_sigqueue, 4 | RV_DEFAULT),	/* 190 */
2210 	NOSYS,					/* 191 */
2211 	NOSYS,					/* 192 */
2212 	NOSYS,					/* 193 */
2213 	NOSYS,					/* 194 */
2214 	NOSYS,					/* 195 */
2215 	NOSYS,					/* 196 */
2216 	NOSYS,					/* 197 */
2217 	NOSYS,					/* 198 */
2218 	NOSYS,					/* 199 */
2219 	NOSYS,					/* 200 */
2220 	NOSYS,					/* 201 */
2221 	NOSYS,					/* 202 */
2222 	NOSYS,					/* 203 */
2223 	NOSYS,					/* 204 */
2224 	EMULATE(s10_signotify, 3 | RV_DEFAULT),	/* 205 */
2225 	NOSYS,					/* 206 */
2226 	NOSYS,					/* 207 */
2227 	NOSYS,					/* 208 */
2228 	NOSYS,					/* 209 */
2229 	EMULATE(s10_lwp_mutex_timedlock, 2 | RV_DEFAULT), /* 210 */
2230 	NOSYS,					/* 211 */
2231 	NOSYS,					/* 212 */
2232 #if defined(_LP64)
2233 	NOSYS,					/* 213 */
2234 #else
2235 	EMULATE(s10_getdents64, 3 | RV_DEFAULT), /* 213 */
2236 #endif
2237 	NOSYS,					/* 214 */
2238 #if defined(_LP64)
2239 	NOSYS,					/* 215 */
2240 	NOSYS,					/* 216 */
2241 	NOSYS,					/* 217 */
2242 #else
2243 	EMULATE(s10_stat64, 2 | RV_DEFAULT),	/* 215 */
2244 	EMULATE(s10_lstat64, 2 | RV_DEFAULT),	/* 216 */
2245 	EMULATE(s10_fstat64, 2 | RV_DEFAULT),	/* 217 */
2246 #endif
2247 	NOSYS,					/* 218 */
2248 	NOSYS,					/* 219 */
2249 	NOSYS,					/* 220 */
2250 	NOSYS,					/* 221 */
2251 	NOSYS,					/* 222 */
2252 #if defined(_LP64)
2253 	NOSYS,					/* 223 */
2254 	NOSYS,					/* 224 */
2255 	NOSYS,					/* 225 */
2256 #else
2257 	EMULATE(s10_pwrite64, 5 | RV_DEFAULT),	/* 223 */
2258 	EMULATE(s10_creat64, 2 | RV_DEFAULT),	/* 224 */
2259 	EMULATE(s10_open64, 3 | RV_DEFAULT),	/* 225 */
2260 #endif
2261 	NOSYS,					/* 226 */
2262 	EMULATE(s10_zone, 5 | RV_DEFAULT),	/* 227 */
2263 	NOSYS,					/* 228 */
2264 	NOSYS,					/* 229 */
2265 	NOSYS,					/* 230 */
2266 	NOSYS,					/* 231 */
2267 	NOSYS,					/* 232 */
2268 	NOSYS,					/* 233 */
2269 	NOSYS,					/* 234 */
2270 	NOSYS,					/* 235 */
2271 	NOSYS,					/* 236 */
2272 	NOSYS,					/* 237 */
2273 	NOSYS,					/* 238 */
2274 	NOSYS,					/* 239 */
2275 	NOSYS,					/* 240 */
2276 	NOSYS,					/* 241 */
2277 	NOSYS,					/* 242 */
2278 	NOSYS,					/* 243 */
2279 	NOSYS,					/* 244 */
2280 	NOSYS,					/* 245 */
2281 	NOSYS,					/* 246 */
2282 	NOSYS,					/* 247 */
2283 	NOSYS,					/* 248 */
2284 	NOSYS,					/* 249 */
2285 	NOSYS,					/* 250 */
2286 	EMULATE(s10_lwp_mutex_trylock, 1 | RV_DEFAULT), /* 251 */
2287 	NOSYS,					/* 252 */
2288 	NOSYS,					/* 253 */
2289 	NOSYS,					/* 254 */
2290 	NOSYS					/* 255 */
2291 };
2292