/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */
/* Copyright (c) 1990 Mentat Inc. */

/*
 * This file contains common code for handling Options Management requests.
 */

#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
#include <sys/strsubr.h>
#include <sys/errno.h>
#define	_SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/ddi.h>
#include <sys/debug.h>		/* for ASSERT */
#include <sys/policy.h>

#include <inet/common.h>
#include <inet/mi.h>
#include <inet/nd.h>
#include <netinet/ip6.h>
#include <inet/ip.h>
#include <inet/mib2.h>
#include <netinet/in.h>
#include "optcom.h"

#include <inet/optcom.h>
#include <inet/ipclassifier.h>
#include <inet/proto_set.h>

/*
 * Function prototypes
 */
static t_scalar_t process_topthdrs_first_pass(mblk_t *, cred_t *, optdb_obj_t *,
    size_t *);
static t_scalar_t do_options_second_pass(queue_t *q, mblk_t *reqmp,
    mblk_t *ack_mp, cred_t *, optdb_obj_t *dbobjp,
    t_uscalar_t *worst_statusp);
static t_uscalar_t get_worst_status(t_uscalar_t, t_uscalar_t);
static int do_opt_default(queue_t *, struct T_opthdr *, uchar_t **,
    t_uscalar_t *, cred_t *, optdb_obj_t *);
static void do_opt_current(queue_t *, struct T_opthdr *, uchar_t **,
    t_uscalar_t *, cred_t *cr, optdb_obj_t *);
static void do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
    uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
    cred_t *, optdb_obj_t *dbobjp);
static boolean_t opt_level_valid(t_uscalar_t, optlevel_t *, uint_t);
static size_t opt_level_allopts_lengths(t_uscalar_t, opdes_t *, uint_t);
static boolean_t opt_length_ok(opdes_t *, t_uscalar_t optlen);
static t_uscalar_t optcom_max_optbuf_len(opdes_t *, uint_t);
static boolean_t opt_bloated_maxsize(opdes_t *);

/* Common code for sending back a T_ERROR_ACK. */
void
optcom_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
{
	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
		qreply(q, mp);
}

/*
 * The option management routines svr4_optcom_req() and tpi_optcom_req() use
 * callback functions as arguments. Here is the expected interfaces
 * assumed from the callback functions
 *
 *
 * (1) deffn(q, optlevel, optname, optvalp)
 *
 *	- Function only called when default value comes from protocol
 *	 specific code and not the option database table (indicated by
 *	  OP_DEF_FN property in option database.)
 *	- Error return is -1. Valid returns are >=0.
 *	- When valid, the return value represents the length used for storing
 *		the default value of the option.
 *      - Error return implies the called routine did not recognize this
 *              option. Something downstream could so input is left unchanged
 *              in request buffer.
 *
 * (2) getfn(q, optlevel, optname, optvalp)
 *
 *	- Error return is -1. Valid returns are >=0.
 *	- When valid, the return value represents the length used for storing
 *		the actual value of the option.
 *      - Error return implies the called routine did not recognize this
 *              option. Something downstream could so input is left unchanged
 *              in request buffer.
 *
 * (3) setfn(q, optset_context, optlevel, optname, inlen, invalp,
 *	outlenp, outvalp, attrp, cr);
 *
 *	- OK return is 0, Error code is returned as a non-zero argument.
 *      - If negative it is ignored by svr4_optcom_req(). If positive, error
 *        is returned. A negative return implies that option, while handled on
 *	  this stack is not handled at this level and will be handled further
 *	  downstream.
 *	- Both negative and positive errors are treats as errors in an
 *	  identical manner by tpi_optcom_req(). The errors affect "status"
 *	  field of each option's T_opthdr. If sucessfull, an appropriate sucess
 *	  result is carried. If error, it instantiated to "failure" at the
 *	  topmost level and left unchanged at other levels. (This "failure" can
 *	  turn to a success at another level).
 *	- optset_context passed for tpi_optcom_req(). It is interpreted as:
 *        - SETFN_OPTCOM_CHECKONLY
 *		semantics are to pretend to set the value and report
 *		back if it would be successful.
 *		This is used with T_CHECK semantics in XTI
 *        - SETFN_OPTCOM_NEGOTIATE
 *		set the value. Call from option management primitive
 *		T_OPTMGMT_REQ when T_NEGOTIATE flags is used.
 *	  - SETFN_UD_NEGOTIATE
 *		option request came riding on UNITDATA primitive most often
 *		has  "this datagram" semantics to influence prpoerties
 *		affecting an outgoig datagram or associated with recived
 *		datagram
 *		[ Note: XTI permits this use outside of "this datagram"
 *		semantics also and permits setting "management related"
 *		options in this	context and its test suite enforces it ]
 *	  - SETFN_CONN_NEGOTATE
 *		option request came riding on CONN_REQ/RES primitive and
 *		most often has "this connection" (negotiation during
 *		"connection estblishment") semantics.
 *		[ Note: XTI permits use of these outside of "this connection"
 *		semantics and permits "management related" options in this
 *		context and its test suite enforces it. ]
 *
 *	- inlen, invalp is the option length,value requested to be set.
 *	- outlenp, outvalp represent return parameters which contain the
 *	  value set and it might be different from one passed on input.
 *	- attrp points to a data structure that's used by v6 modules to
 *	  store ancillary data options or sticky options.
 *	- cr points to the caller's credentials
 *	- the caller might pass same buffers for input and output and the
 *	  routine should protect against this case by not updating output
 *	  buffers until it is done referencing input buffers and any other
 *	  issues (e.g. not use bcopy() if we do not trust what it does).
 *      - If option is not known, it returns error. We randomly pick EINVAL.
 *        It can however get called with options that are handled downstream
 *        opr upstream so for svr4_optcom_req(), it does not return error for
 *        negative return values.
 *
 */

/*
 * Upper Level Protocols call this routine when they receive
 * a T_SVR4_OPTMGMT_REQ message.  They supply callback functions
 * for setting a new value for a single options, getting the
 * current value for a single option, and checking for support
 * of a single option.  svr4_optcom_req validates the option management
 * buffer passed in, and calls the appropriate routines to do the
 * job requested.
 * XXX Code below needs some restructuring after we have some more
 * macros to support 'struct opthdr' in the headers.
 */
void
svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp)
{
	pfi_t	deffn = dbobjp->odb_deffn;
	pfi_t	getfn = dbobjp->odb_getfn;
	opt_set_fn setfn = dbobjp->odb_setfn;
	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
	t_uscalar_t max_optbuf_len;
	int len;
	mblk_t	*mp1 = NULL;
	struct opthdr *next_opt;
	struct opthdr *opt;
	struct opthdr *opt1;
	struct opthdr *opt_end;
	struct opthdr *opt_start;
	opdes_t	*optd;
	struct T_optmgmt_ack *toa;
	struct T_optmgmt_req *tor;
	int error;

	tor = (struct T_optmgmt_req *)mp->b_rptr;
	/* Verify message integrity. */
	if (mp->b_wptr - mp->b_rptr < sizeof (struct T_optmgmt_req))
		goto bad_opt;
	/* Verify MGMT_flags legal */
	switch (tor->MGMT_flags) {
	case T_DEFAULT:
	case T_NEGOTIATE:
	case T_CURRENT:
	case T_CHECK:
		/* OK - legal request flags */
		break;
	default:
		optcom_err_ack(q, mp, TBADFLAG, 0);
		return;
	}
	if (tor->MGMT_flags == T_DEFAULT) {
		/* Is it a request for default option settings? */

		/*
		 * Note: XXX TLI and TPI specification was unclear about
		 * semantics of T_DEFAULT and the following historical note
		 * and its interpretation is incorrect (it implies a request
		 * for default values of only the identified options not all.
		 * The semantics have been explained better in XTI spec.)
		 * However, we do not modify (comment or code) here to keep
		 * compatibility.
		 * We can rethink this if it ever becomes an issue.
		 * ----historical comment start------
		 * As we understand it, the input buffer is meaningless
		 * so we ditch the message.  A T_DEFAULT request is a
		 * request to obtain a buffer containing defaults for
		 * all supported options, so we allocate a maximum length
		 * reply.
		 * ----historical comment end -------
		 */
		/* T_DEFAULT not passed down */
		freemsg(mp);
		max_optbuf_len = optcom_max_optbuf_len(opt_arr,
		    opt_arr_cnt);
		mp = allocb(max_optbuf_len, BPRI_MED);
		if (!mp) {
no_mem:;
			optcom_err_ack(q, mp, TSYSERR, ENOMEM);
			return;
		}

		/* Initialize the T_optmgmt_ack header. */
		toa = (struct T_optmgmt_ack *)mp->b_rptr;
		bzero((char *)toa, max_optbuf_len);
		toa->PRIM_type = T_OPTMGMT_ACK;
		toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
		/* TODO: Is T_DEFAULT the right thing to put in MGMT_flags? */
		toa->MGMT_flags = T_DEFAULT;

		/* Now walk the table of options passed in */
		opt = (struct opthdr *)&toa[1];
		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
			/*
			 * All the options in the table of options passed
			 * in are by definition supported by the protocol
			 * calling this function.
			 */
			if (!OA_READ_PERMISSION(optd, cr))
				continue;
			opt->level = optd->opdes_level;
			opt->name = optd->opdes_name;
			if (!(optd->opdes_props & OP_DEF_FN) ||
			    ((len = (*deffn)(q, opt->level,
			    opt->name, (uchar_t *)&opt[1])) < 0)) {
				/*
				 * Fill length and value from table.
				 *
				 * Default value not instantiated from function
				 * (or the protocol specific function failed it;
				 * In this interpretation of T_DEFAULT, this is
				 * the best we can do)
				 */
				switch (optd->opdes_size) {
				/*
				 * Since options are guaranteed aligned only
				 * on a 4 byte boundary (t_scalar_t) any
				 * option that is greater in size will default
				 * to the bcopy below
				 */
				case sizeof (int32_t):
					*(int32_t *)&opt[1] =
					    (int32_t)optd->opdes_default;
					break;
				case sizeof (int16_t):
					*(int16_t *)&opt[1] =
					    (int16_t)optd->opdes_default;
					break;
				case sizeof (int8_t):
					*(int8_t *)&opt[1] =
					    (int8_t)optd->opdes_default;
					break;
				default:
					/*
					 * other length but still assume
					 * fixed - use bcopy
					 */
					bcopy(optd->opdes_defbuf,
					    &opt[1], optd->opdes_size);
					break;
				}
				opt->len = optd->opdes_size;
			}
			else
				opt->len = (t_uscalar_t)len;
			opt = (struct opthdr *)((char *)&opt[1] +
			    _TPI_ALIGN_OPT(opt->len));
		}

		/* Now record the final length. */
		toa->OPT_length = (t_scalar_t)((char *)opt - (char *)&toa[1]);
		mp->b_wptr = (uchar_t *)opt;
		mp->b_datap->db_type = M_PCPROTO;
		/* Ship it back. */
		qreply(q, mp);
		return;
	}
	/* T_DEFAULT processing complete - no more T_DEFAULT */

	/*
	 * For T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make a
	 * pass through the input buffer validating the details and
	 * making sure each option is supported by the protocol.
	 */
	if ((opt_start = (struct opthdr *)mi_offset_param(mp,
	    tor->OPT_offset, tor->OPT_length)) == NULL)
		goto bad_opt;
	if (!__TPI_OPT_ISALIGNED(opt_start))
		goto bad_opt;

	opt_end = (struct opthdr *)((uchar_t *)opt_start +
	    tor->OPT_length);

	for (opt = opt_start; opt < opt_end; opt = next_opt) {
		/*
		 * Verify we have room to reference the option header
		 * fields in the option buffer.
		 */
		if ((uchar_t *)opt + sizeof (struct opthdr) >
		    (uchar_t *)opt_end)
			goto bad_opt;
		/*
		 * We now compute pointer to next option in buffer 'next_opt'
		 * The next_opt computation above below 'opt->len' initialized
		 * by application which cannot be trusted. The usual value
		 * too large will be captured by the loop termination condition
		 * above. We check for the following which it will miss.
		 * 	-pointer space wraparound arithmetic overflow
		 *	-last option in buffer with 'opt->len' being too large
		 *	 (only reason 'next_opt' should equal or exceed
		 *	 'opt_end' for last option is roundup unless length is
		 *	 too-large/invalid)
		 */
		next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
		    _TPI_ALIGN_OPT(opt->len));

		if ((uchar_t *)next_opt < (uchar_t *)&opt[1] ||
		    ((next_opt >= opt_end) &&
		    (((uchar_t *)next_opt - (uchar_t *)opt_end) >=
		    __TPI_ALIGN_SIZE)))
			goto bad_opt;

		/* sanity check */
		if (opt->name == T_ALLOPT)
			goto bad_opt;

		error = proto_opt_check(opt->level, opt->name, opt->len, NULL,
		    opt_arr, opt_arr_cnt,
		    tor->MGMT_flags == T_NEGOTIATE, tor->MGMT_flags == T_CHECK,
		    cr);
		if (error < 0) {
			optcom_err_ack(q, mp, -error, 0);
			return;
		} else if (error > 0) {
			optcom_err_ack(q, mp, TSYSERR, error);
			return;
		}
	} /* end for loop scanning option buffer */

	/* Now complete the operation as required. */
	switch (tor->MGMT_flags) {
	case T_CHECK:
		/*
		 * Historically used same as T_CURRENT (which was added to
		 * standard later). Code retained for compatibility.
		 */
		/* FALLTHROUGH */
	case T_CURRENT:
		/*
		 * Allocate a maximum size reply.  Perhaps we are supposed to
		 * assume that the input buffer includes space for the answers
		 * as well as the opthdrs, but we don't know that for sure.
		 * So, instead, we create a new output buffer, using the
		 * input buffer only as a list of options.
		 */
		max_optbuf_len = optcom_max_optbuf_len(opt_arr,
		    opt_arr_cnt);
		mp1 = allocb_tmpl(max_optbuf_len, mp);
		if (!mp1)
			goto no_mem;
		/* Initialize the header. */
		mp1->b_datap->db_type = M_PCPROTO;
		mp1->b_wptr = &mp1->b_rptr[sizeof (struct T_optmgmt_ack)];
		toa = (struct T_optmgmt_ack *)mp1->b_rptr;
		toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
		toa->MGMT_flags = tor->MGMT_flags;
		/*
		 * Walk through the input buffer again, this time adding
		 * entries to the output buffer for each option requested.
		 * Note, sanity of option header, last option etc, verified
		 * in first pass.
		 */
		opt1 = (struct opthdr *)&toa[1];

		for (opt = opt_start; opt < opt_end; opt = next_opt) {

			next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
			    _TPI_ALIGN_OPT(opt->len));

			opt1->name = opt->name;
			opt1->level = opt->level;
			len = (*getfn)(q, opt->level,
			    opt->name, (uchar_t *)&opt1[1]);
			/*
			 * Failure means option is not recognized. Copy input
			 * buffer as is
			 */
			if (len < 0) {
				opt1->len = opt->len;
				bcopy(&opt[1], &opt1[1], opt->len);
			} else {
				opt1->len = (t_uscalar_t)len;
			}
			opt1 = (struct opthdr *)((uchar_t *)&opt1[1] +
			    _TPI_ALIGN_OPT(opt1->len));
		} /* end for loop */

		/* Record the final length. */
		toa->OPT_length = (t_scalar_t)((uchar_t *)opt1 -
		    (uchar_t *)&toa[1]);
		mp1->b_wptr = (uchar_t *)opt1;
		/* Ditch the input buffer. */
		freemsg(mp);
		mp = mp1;
		break;

	case T_NEGOTIATE:
		/*
		 * Here we are expecting that the response buffer is exactly
		 * the same size as the input buffer.  We pass each opthdr
		 * to the protocol's set function.  If the protocol doesn't
		 * like it, it can update the value in it return argument.
		 */
		/*
		 * Pass each negotiated option through the protocol set
		 * function.
		 * Note: sanity check on option header values done in first
		 * pass and not repeated here.
		 */
		toa = (struct T_optmgmt_ack *)tor;

		for (opt = opt_start; opt < opt_end; opt = next_opt) {
			int error;

			next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
			    _TPI_ALIGN_OPT(opt->len));

			error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE,
			    opt->level, opt->name,
			    opt->len, (uchar_t *)&opt[1],
			    &opt->len, (uchar_t *)&opt[1], NULL, cr);
			/*
			 * Treat positive "errors" as real.
			 * Note: negative errors are to be treated as
			 * non-fatal by svr4_optcom_req() and are
			 * returned by setfn() when it is passed an
			 * option it does not handle. Since the option
			 * passed proto_opt_lookup(), it is implied that
			 * it is valid but was either handled upstream
			 * or will be handled downstream.
			 */
			if (error > 0) {
				optcom_err_ack(q, mp, TSYSERR, error);
				return;
			}
			/*
			 * error < 0 means option is not recognized.
			 */
		}
		break;
	default:
		optcom_err_ack(q, mp, TBADFLAG, 0);
		return;
	}

	/* Set common fields in the header. */
	toa->MGMT_flags = T_SUCCESS;
	mp->b_datap->db_type = M_PCPROTO;
	toa->PRIM_type = T_OPTMGMT_ACK;
	qreply(q, mp);
	return;
bad_opt:;
	optcom_err_ack(q, mp, TBADOPT, 0);
}

/*
 * New optcom_req inspired by TPI/XTI semantics
 */
void
tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp)
{
	t_scalar_t t_error;
	mblk_t *toa_mp;
	size_t toa_len;
	struct T_optmgmt_ack *toa;
	struct T_optmgmt_req *tor =
	    (struct T_optmgmt_req *)mp->b_rptr;
	t_uscalar_t worst_status;

	/* Verify message integrity. */
	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_optmgmt_req)) {
		optcom_err_ack(q, mp, TBADOPT, 0);
		return;
	}

	/* Verify MGMT_flags legal */
	switch (tor->MGMT_flags) {
	case T_DEFAULT:
	case T_NEGOTIATE:
	case T_CURRENT:
	case T_CHECK:
		/* OK - legal request flags */
		break;
	default:
		optcom_err_ack(q, mp, TBADFLAG, 0);
		return;
	}

	/*
	 * In this design, there are two passes required on the input buffer
	 * mostly to accomodate variable length options and "T_ALLOPT" option
	 * which has the semantics "all options of the specified level".
	 *
	 * For T_DEFAULT, T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make
	 * a pass through the input buffer validating the details and making
	 * sure each option is supported by the protocol. We also determine the
	 * length of the option buffer to return. (Variable length options and
	 * T_ALLOPT mean that length can be different for output buffer).
	 */

	toa_len = 0;		/* initial value */

	/*
	 * First pass, we do the following
	 *	- estimate cumulative length needed for results
	 *	- set "status" field based on permissions, option header check
	 *	  etc.
	 */
	if ((t_error = process_topthdrs_first_pass(mp, cr, dbobjp,
	    &toa_len)) != 0) {
		optcom_err_ack(q, mp, t_error, 0);
		return;
	}

	/*
	 * A validation phase of the input buffer is done. We have also
	 * obtained the length requirement and and other details about the
	 * input and we liked input buffer so far.  We make another scan
	 * through the input now and generate the output necessary to complete
	 * the operation.
	 */

	toa_mp = allocb_tmpl(toa_len, mp);
	if (!toa_mp) {
		optcom_err_ack(q, mp, TSYSERR, ENOMEM);
		return;
	}

	/*
	 * Set initial values for generating output.
	 */
	worst_status = T_SUCCESS; /* initial value */

	/*
	 * This routine makes another pass through the option buffer this
	 * time acting on the request based on "status" result in the
	 * first pass. It also performs "expansion" of T_ALLOPT into
	 * all options of a certain level and acts on each for this request.
	 */
	if ((t_error = do_options_second_pass(q, mp, toa_mp, cr, dbobjp,
	    &worst_status)) != 0) {
		freemsg(toa_mp);
		optcom_err_ack(q, mp, t_error, 0);
		return;
	}

	/*
	 * Following code relies on the coincidence that T_optmgmt_req
	 * and T_optmgmt_ack are identical in binary representation
	 */
	toa = (struct T_optmgmt_ack *)toa_mp->b_rptr;
	toa->OPT_length = (t_scalar_t)(toa_mp->b_wptr - (toa_mp->b_rptr +
	    sizeof (struct T_optmgmt_ack)));
	toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);

	toa->MGMT_flags = tor->MGMT_flags;

	freemsg(mp);		/* free input mblk */

	toa->PRIM_type = T_OPTMGMT_ACK;
	toa_mp->b_datap->db_type = M_PCPROTO;
	toa->MGMT_flags |= worst_status; /* XXX "worst" or "OR" TPI ? */
	qreply(q, toa_mp);
}


/*
 * Following routine makes a pass through option buffer in mp and performs the
 * following tasks.
 *	- estimate cumulative length needed for results
 *	- set "status" field based on permissions, option header check
 *	  etc.
 */

static t_scalar_t
process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
    size_t *toa_lenp)
{
	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
	optlevel_t *valid_level_arr = dbobjp->odb_valid_levels_arr;
	uint_t valid_level_arr_cnt = dbobjp->odb_valid_levels_arr_cnt;
	struct T_opthdr *opt;
	struct T_opthdr *opt_start, *opt_end;
	opdes_t	*optd;
	size_t allopt_len;
	struct T_optmgmt_req *tor =
	    (struct T_optmgmt_req *)mp->b_rptr;

	*toa_lenp = sizeof (struct T_optmgmt_ack); /* initial value */

	if ((opt_start = (struct T_opthdr *)
	    mi_offset_param(mp, tor->OPT_offset, tor->OPT_length)) == NULL) {
		return (TBADOPT);
	}
	if (!__TPI_TOPT_ISALIGNED(opt_start))
		return (TBADOPT);

	opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length);

	for (opt = opt_start; opt && (opt < opt_end);
	    opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) {
		/*
		 * Validate the option for length and alignment
		 * before accessing anything in it.
		 */
		if (!(_TPI_TOPT_VALID(opt, opt_start, opt_end)))
			return (TBADOPT);

		/* Find the option in the opt_arr. */
		if (opt->name != T_ALLOPT) {
			optd = proto_opt_lookup(opt->level, opt->name,
			    opt_arr, opt_arr_cnt);
			if (optd == NULL) {
				/*
				 * Option not found
				 *
				 * Verify if level is "valid" or not.
				 * Note: This check is required by XTI
				 *
				 * TPI provider always initializes
				 * the "not supported" (or whatever) status
				 * for the options. Other levels leave status
				 * unchanged if they do not understand an
				 * option.
				 */
				if (!opt_level_valid(opt->level,
				    valid_level_arr, valid_level_arr_cnt))
					return (TBADOPT);
				/*
				 * level is valid - initialize
				 * option as not supported
				 */
				opt->status = T_NOTSUPPORT;
				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
				continue;
			}
		} else {
			/*
			 * Handle T_ALLOPT case as a special case.
			 * Note: T_ALLOPT does not mean anything
			 * for T_CHECK operation.
			 */
			allopt_len = 0;
			if (tor->MGMT_flags == T_CHECK ||
			    ((allopt_len = opt_level_allopts_lengths(opt->level,
			    opt_arr, opt_arr_cnt)) == 0)) {
				/*
				 * This is confusing but correct !
				 * It is not valid to to use T_ALLOPT with
				 * T_CHECK flag.
				 *
				 * opt_level_allopts_lengths() is used to verify
				 * that "level" associated with the T_ALLOPT is
				 * supported.
				 *
				 */
				opt->status = T_FAILURE;
				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
				continue;
			}
			ASSERT(allopt_len != 0); /* remove ? */

			*toa_lenp += allopt_len;
			opt->status = T_SUCCESS;
			continue;
		}

		/* Additional checks dependent on operation. */
		switch (tor->MGMT_flags) {
		case T_DEFAULT:
		case T_CURRENT:

			/*
			 * The proto_opt_lookup() routine call above approved of
			 * this option so we can work on the status for it
			 * based on the permissions for the operation. (This
			 * can override any status for it set at higher levels)
			 * We assume this override is OK since chkfn at this
			 * level approved of this option.
			 *
			 * T_CURRENT semantics:
			 * The read access is required. Else option
			 * status is T_NOTSUPPORT.
			 *
			 * T_DEFAULT semantics:
			 * Note: specification is not clear on this but we
			 * interpret T_DEFAULT semantics such that access to
			 * read value is required for access even the default
			 * value. Otherwise the option status is T_NOTSUPPORT.
			 */
			if (!OA_READ_PERMISSION(optd, cr)) {
				opt->status = T_NOTSUPPORT;
				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
				/* skip to next */
				continue;
			}

			/*
			 * T_DEFAULT/T_CURRENT semantics:
			 * We know that read access is set. If no other access
			 * is set, then status is T_READONLY.
			 */
			if (OA_READONLY_PERMISSION(optd, cr))
				opt->status = T_READONLY;
			else
				opt->status = T_SUCCESS;
			/*
			 * Option passes all checks. Make room for it in the
			 * ack. Note: size stored in table does not include
			 * space for option header.
			 */
			*toa_lenp += sizeof (struct T_opthdr) +
			    _TPI_ALIGN_TOPT(optd->opdes_size);
			break;

		case T_CHECK:
		case T_NEGOTIATE:

			/*
			 * T_NEGOTIATE semantics:
			 * If for fixed length option value on input is not the
			 * same as value supplied, then status is T_FAILURE.
			 *
			 * T_CHECK semantics:
			 * If value is supplied, semantics same as T_NEGOTIATE.
			 * It is however ok not to supply a value with T_CHECK.
			 */

			if (tor->MGMT_flags == T_NEGOTIATE ||
			    (opt->len != sizeof (struct T_opthdr))) {
				/*
				 * Implies "value" is specified in T_CHECK or
				 * it is a T_NEGOTIATE request.
				 * Verify size.
				 * Note: This can override anything about this
				 * option request done at a higher level.
				 */
				if (opt->len < sizeof (struct T_opthdr) ||
				    !opt_length_ok(optd,
				    opt->len - sizeof (struct T_opthdr))) {
					/* bad size */
					*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
					opt->status = T_FAILURE;
					continue;
				}
			}
			/*
			 * The proto_opt_lookup()  routine above() approved of
			 * this option so we can work on the status for it based
			 * on the permissions for the operation. (This can
			 * override anything set at a higher level).
			 *
			 * T_CHECK/T_NEGOTIATE semantics:
			 * Set status to T_READONLY if read is the only access
			 * permitted
			 */
			if (OA_READONLY_PERMISSION(optd, cr)) {
				opt->status = T_READONLY;
				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
				/* skip to next */
				continue;
			}

			/*
			 * T_CHECK/T_NEGOTIATE semantics:
			 * If write (or execute) access is not set, then status
			 * is T_NOTSUPPORT.
			 */
			if (!OA_WRITE_OR_EXECUTE(optd, cr)) {
				opt->status = T_NOTSUPPORT;
				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
				/* skip to next option */
				continue;
			}
			/*
			 * Option passes all checks. Make room for it in the
			 * ack and set success in status.
			 * Note: size stored in table does not include header
			 * length.
			 */
			opt->status = T_SUCCESS;
			*toa_lenp += sizeof (struct T_opthdr) +
			    _TPI_ALIGN_TOPT(optd->opdes_size);
			break;

		default:
			return (TBADFLAG);
		}
	} /* for loop scanning input buffer */

	return (0);		/* OK return */
}

/*
 * This routine makes another pass through the option buffer this
 * time acting on the request based on "status" result in the
 * first pass. It also performs "expansion" of T_ALLOPT into
 * all options of a certain level and acts on each for this request.
 */
static t_scalar_t
do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr,
    optdb_obj_t *dbobjp, t_uscalar_t *worst_statusp)
{
	int failed_option;
	struct T_opthdr *opt;
	struct T_opthdr *opt_start, *opt_end;
	uchar_t *optr;
	uint_t optset_context;
	struct T_optmgmt_req *tor = (struct T_optmgmt_req *)reqmp->b_rptr;

	optr = (uchar_t *)ack_mp->b_rptr +
	    sizeof (struct T_optmgmt_ack); /* assumed int32_t aligned */

	/*
	 * Set initial values for scanning input
	 */
	opt_start = (struct T_opthdr *)mi_offset_param(reqmp,
	    tor->OPT_offset, tor->OPT_length);
	if (opt_start == NULL)
		return (TBADOPT);
	opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length);
	ASSERT(__TPI_TOPT_ISALIGNED(opt_start)); /* verified in first pass */

	for (opt = opt_start; opt && (opt < opt_end);
	    opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) {

		/* verified in first pass */
		ASSERT(_TPI_TOPT_VALID(opt, opt_start, opt_end));

		/*
		 * If the first pass in process_topthdrs_first_pass()
		 * has marked the option as a failure case for the MGMT_flags
		 * semantics then there is not much to do.
		 *
		 * Note: For all practical purposes, T_READONLY status is
		 * a "success" for T_DEFAULT/T_CURRENT and "failure" for
		 * T_CHECK/T_NEGOTIATE
		 */
		failed_option =
		    (opt->status == T_NOTSUPPORT) ||
		    (opt->status == T_FAILURE) ||
		    ((tor->MGMT_flags & (T_NEGOTIATE|T_CHECK)) &&
		    (opt->status == T_READONLY));

		if (failed_option) {
			/*
			 * According to T_DEFAULT/T_CURRENT semantics, the
			 * input values, even if present, are to be ignored.
			 * Note: Specification is not clear on this, but we
			 * interpret that even though we ignore the values, we
			 * can return them as is. So we process them similar to
			 * T_CHECK/T_NEGOTIATE case which has the semantics to
			 * return the values as is. XXX If interpretation is
			 * ever determined incorrect fill in appropriate code
			 * here to treat T_DEFAULT/T_CURRENT differently.
			 *
			 * According to T_CHECK/T_NEGOTIATE semantics,
			 * in the case of T_NOTSUPPORT/T_FAILURE/T_READONLY,
			 * the semantics are to return the "value" part of
			 * option untouched. So here we copy the option
			 * head including value part if any to output.
			 */

			bcopy(opt, optr, opt->len);
			optr += _TPI_ALIGN_TOPT(opt->len);

			*worst_statusp = get_worst_status(opt->status,
			    *worst_statusp);

			/* skip to process next option in buffer */
			continue;

		} /* end if "failed option" */
		/*
		 * The status is T_SUCCESS or T_READONLY
		 * We process the value part here
		 */
		ASSERT(opt->status == T_SUCCESS || opt->status == T_READONLY);
		switch (tor->MGMT_flags) {
		case T_DEFAULT:
			/*
			 * We fill default value from table or protocol specific
			 * function. If this call fails, we pass input through.
			 */
			if (do_opt_default(q, opt, &optr, worst_statusp,
			    cr, dbobjp) < 0) {
				opt->status = T_FAILURE;
				bcopy(opt, optr, opt->len);
				optr += _TPI_ALIGN_TOPT(opt->len);
				*worst_statusp = get_worst_status(opt->status,
				    *worst_statusp);
			}
			break;

		case T_CURRENT:

			do_opt_current(q, opt, &optr, worst_statusp, cr,
			    dbobjp);
			break;

		case T_CHECK:
		case T_NEGOTIATE:
			if (tor->MGMT_flags == T_CHECK)
				optset_context = SETFN_OPTCOM_CHECKONLY;
			else	/* T_NEGOTIATE */
				optset_context = SETFN_OPTCOM_NEGOTIATE;
			do_opt_check_or_negotiate(q, opt, optset_context,
			    &optr, worst_statusp, cr, dbobjp);
			break;
		default:
			return (TBADFLAG);
		}
	} /* end for loop scanning option buffer */

	ack_mp->b_wptr = optr;
	ASSERT(ack_mp->b_wptr <= ack_mp->b_datap->db_lim);

	return (0);		/* OK return */
}


static t_uscalar_t
get_worst_status(t_uscalar_t status, t_uscalar_t current_worst_status)
{
	/*
	 * Return the "worst" among the arguments "status" and
	 * "current_worst_status".
	 *
	 * Note: Tracking "worst_status" can be made a bit simpler
	 * if we use the property that status codes are bitwise
	 * distinct.
	 *
	 * The pecking order is
	 *
	 * T_SUCCESS ..... best
	 * T_PARTSUCCESS
	 * T_FAILURE
	 * T_READONLY
	 * T_NOTSUPPORT... worst
	 */
	if (status == current_worst_status)
		return (current_worst_status);
	switch (current_worst_status) {
	case T_SUCCESS:
		if (status == T_PARTSUCCESS)
			return (T_PARTSUCCESS);
		/* FALLTHROUGH */
	case T_PARTSUCCESS:
		if (status == T_FAILURE)
			return (T_FAILURE);
		/* FALLTHROUGH */
	case T_FAILURE:
		if (status == T_READONLY)
			return (T_READONLY);
		/* FALLTHROUGH */
	case T_READONLY:
		if (status == T_NOTSUPPORT)
			return (T_NOTSUPPORT);
		/* FALLTHROUGH */
	case T_NOTSUPPORT:
	default:
		return (current_worst_status);
	}
}

static int
do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
    t_uscalar_t *worst_statusp, cred_t *cr, optdb_obj_t *dbobjp)
{
	pfi_t	deffn = dbobjp->odb_deffn;
	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;

	struct T_opthdr *topth;
	opdes_t *optd;

	if (reqopt->name != T_ALLOPT) {
		/*
		 * lookup the option in the table and fill default value
		 */
		optd = proto_opt_lookup(reqopt->level, reqopt->name,
		    opt_arr, opt_arr_cnt);

		/* Calling routine should have verified it it exists */
		ASSERT(optd != NULL);

		topth = (struct T_opthdr *)(*resptrp);
		topth->level = reqopt->level;
		topth->name = reqopt->name;
		topth->status = reqopt->status;

		*worst_statusp = get_worst_status(reqopt->status,
		    *worst_statusp);

		if (optd->opdes_props & OP_NODEFAULT) {
			/* header only, no default "value" part */
			topth->len = sizeof (struct T_opthdr);
			*resptrp += sizeof (struct T_opthdr);
		} else {
			int deflen;

			if (optd->opdes_props & OP_DEF_FN) {
				deflen = (*deffn)(q, reqopt->level,
				    reqopt->name, _TPI_TOPT_DATA(topth));
				if (deflen >= 0) {
					topth->len = (t_uscalar_t)
					    (sizeof (struct T_opthdr) + deflen);
				} else {
					/*
					 * return error, this should 'pass
					 * through' the option and maybe some
					 * other level will fill it in or
					 * already did.
					 * (No change in 'resptrp' upto here)
					 */
					return (-1);
				}
			} else {
				/* fill length and value part */
				switch (optd->opdes_size) {
				/*
				 * Since options are guaranteed aligned only
				 * on a 4 byte boundary (t_scalar_t) any
				 * option that is greater in size will default
				 * to the bcopy below
				 */
				case sizeof (int32_t):
					*(int32_t *)_TPI_TOPT_DATA(topth) =
					    (int32_t)optd->opdes_default;
					break;
				case sizeof (int16_t):
					*(int16_t *)_TPI_TOPT_DATA(topth) =
					    (int16_t)optd->opdes_default;
					break;
				case sizeof (int8_t):
					*(int8_t *)_TPI_TOPT_DATA(topth) =
					    (int8_t)optd->opdes_default;
					break;
				default:
					/*
					 * other length but still assume
					 * fixed - use bcopy
					 */
					bcopy(optd->opdes_defbuf,
					    _TPI_TOPT_DATA(topth),
					    optd->opdes_size);
					break;
				}
				topth->len = (t_uscalar_t)(optd->opdes_size +
				    sizeof (struct T_opthdr));
			}
			*resptrp += _TPI_ALIGN_TOPT(topth->len);
		}
		return (0);	/* OK return */
	}

	/*
	 * T_ALLOPT processing
	 *
	 * lookup and stuff default values of all the options of the
	 * level specified
	 */
	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
		if (reqopt->level != optd->opdes_level)
			continue;
		/*
		 *
		 * T_DEFAULT semantics:
		 * XXX: we interpret T_DEFAULT semantics such that access to
		 * read value is required for access even the default value.
		 * Else option is ignored for T_ALLOPT request.
		 */
		if (!OA_READ_PERMISSION(optd, cr))
			/* skip this one */
			continue;

		/*
		 * Found option of same level as T_ALLOPT request
		 * that we can return.
		 */

		topth = (struct T_opthdr *)(*resptrp);
		topth->level = optd->opdes_level;
		topth->name = optd->opdes_name;

		/*
		 * T_DEFAULT semantics:
		 * We know that read access is set. If no other access is set,
		 * then status is T_READONLY
		 */
		if (OA_READONLY_PERMISSION(optd, cr)) {
			topth->status = T_READONLY;
			*worst_statusp = get_worst_status(T_READONLY,
			    *worst_statusp);
		} else {
			topth->status = T_SUCCESS;
			/*
			 * Note: *worst_statusp has to be T_SUCCESS or
			 * worse so no need to adjust
			 */
		}

		if (optd->opdes_props & OP_NODEFAULT) {
			/* header only, no value part */
			topth->len = sizeof (struct T_opthdr);
			*resptrp += sizeof (struct T_opthdr);
		} else {
			int deflen;

			if (optd->opdes_props & OP_DEF_FN) {
				deflen = (*deffn)(q, reqopt->level,
				    reqopt->name, _TPI_TOPT_DATA(topth));
				if (deflen >= 0) {
					topth->len = (t_uscalar_t)(deflen +
					    sizeof (struct T_opthdr));
				} else {
					/*
					 * deffn failed.
					 * return just the header as T_ALLOPT
					 * expansion.
					 * Some other level deffn may
					 * supply value part.
					 */
					topth->len = sizeof (struct T_opthdr);
					topth->status = T_FAILURE;
					*worst_statusp =
					    get_worst_status(T_FAILURE,
					    *worst_statusp);
				}
			} else {
				/*
				 * fill length and value part from
				 * table
				 */
				switch (optd->opdes_size) {
				/*
				 * Since options are guaranteed aligned only
				 * on a 4 byte boundary (t_scalar_t) any
				 * option that is greater in size will default
				 * to the bcopy below
				 */
				case sizeof (int32_t):
					*(int32_t *)_TPI_TOPT_DATA(topth) =
					    (int32_t)optd->opdes_default;
					break;
				case sizeof (int16_t):
					*(int16_t *)_TPI_TOPT_DATA(topth) =
					    (int16_t)optd->opdes_default;
					break;
				case sizeof (int8_t):
					*(int8_t *)_TPI_TOPT_DATA(topth) =
					    (int8_t)optd->opdes_default;
					break;
				default:
					/*
					 * other length but still assume
					 * fixed - use bcopy
					 */
					bcopy(optd->opdes_defbuf,
					    _TPI_TOPT_DATA(topth),
					    optd->opdes_size);
				}
				topth->len = (t_uscalar_t)(optd->opdes_size +
				    sizeof (struct T_opthdr));
			}
			*resptrp += _TPI_ALIGN_TOPT(topth->len);
		}
	}
	return (0);
}

static void
do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
    t_uscalar_t *worst_statusp, cred_t *cr, optdb_obj_t *dbobjp)
{
	pfi_t	getfn = dbobjp->odb_getfn;
	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
	struct T_opthdr *topth;
	opdes_t *optd;
	int optlen;
	uchar_t *initptr = *resptrp;

	/*
	 * We call getfn to get the current value of an option. The call may
	 * fail in which case we copy the values from the input buffer. Maybe
	 * something downstream will fill it in or something upstream did.
	 */

	if (reqopt->name != T_ALLOPT) {
		topth = (struct T_opthdr *)*resptrp;
		*resptrp += sizeof (struct T_opthdr);
		optlen = (*getfn)(q, reqopt->level, reqopt->name, *resptrp);
		if (optlen >= 0) {
			topth->len = (t_uscalar_t)(optlen +
			    sizeof (struct T_opthdr));
			topth->level = reqopt->level;
			topth->name = reqopt->name;
			topth->status = reqopt->status;
			*resptrp += _TPI_ALIGN_TOPT(optlen);
			*worst_statusp = get_worst_status(topth->status,
			    *worst_statusp);
		} else {
			/* failed - reset "*resptrp" pointer */
			*resptrp -= sizeof (struct T_opthdr);
		}
	} else {		/* T_ALLOPT processing */
		/* scan and get all options */
		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
			/* skip other levels */
			if (reqopt->level != optd->opdes_level)
				continue;

			if (!OA_READ_PERMISSION(optd, cr))
				/* skip this one */
				continue;

			topth = (struct T_opthdr *)*resptrp;
			*resptrp += sizeof (struct T_opthdr);

			/* get option of this level */
			optlen = (*getfn)(q, reqopt->level, optd->opdes_name,
			    *resptrp);
			if (optlen >= 0) {
				/* success */
				topth->len = (t_uscalar_t)(optlen +
				    sizeof (struct T_opthdr));
				topth->level = reqopt->level;
				topth->name = optd->opdes_name;
				if (OA_READONLY_PERMISSION(optd, cr))
					topth->status = T_READONLY;
				else
					topth->status = T_SUCCESS;
				*resptrp += _TPI_ALIGN_TOPT(optlen);
			} else {
				/*
				 * failed, return as T_FAILURE and null value
				 * part. Maybe something downstream will
				 * handle this one and fill in a value. Here
				 * it is just part of T_ALLOPT expansion.
				 */
				topth->len = sizeof (struct T_opthdr);
				topth->level = reqopt->level;
				topth->name = optd->opdes_name;
				topth->status = T_FAILURE;
			}
			*worst_statusp = get_worst_status(topth->status,
			    *worst_statusp);
		} /* end for loop */
	}
	if (*resptrp == initptr) {
		/*
		 * getfn failed and does not want to handle this option.
		 */
		reqopt->status = T_FAILURE;
		bcopy(reqopt, *resptrp, reqopt->len);
		*resptrp += _TPI_ALIGN_TOPT(reqopt->len);
		*worst_statusp = get_worst_status(reqopt->status,
		    *worst_statusp);
	}
}

static void
do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
    uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
    cred_t *cr, optdb_obj_t *dbobjp)
{
	pfi_t	deffn = dbobjp->odb_deffn;
	opt_set_fn setfn = dbobjp->odb_setfn;
	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
	struct T_opthdr *topth;
	opdes_t *optd;
	int error;
	t_uscalar_t optlen;
	t_scalar_t optsize;
	uchar_t *initptr = *resptrp;

	ASSERT(reqopt->status == T_SUCCESS);

	if (reqopt->name != T_ALLOPT) {
		topth = (struct T_opthdr *)*resptrp;
		*resptrp += sizeof (struct T_opthdr);
		error = (*setfn)(q, optset_context, reqopt->level, reqopt->name,
		    reqopt->len - sizeof (struct T_opthdr),
		    _TPI_TOPT_DATA(reqopt), &optlen, _TPI_TOPT_DATA(topth),
		    NULL, cr);
		if (error) {
			/* failed - reset "*resptrp" */
			*resptrp -= sizeof (struct T_opthdr);
		} else {
			/*
			 * success - "value" already filled in setfn()
			 */
			topth->len = (t_uscalar_t)(optlen +
			    sizeof (struct T_opthdr));
			topth->level = reqopt->level;
			topth->name = reqopt->name;
			topth->status = reqopt->status;
			*resptrp += _TPI_ALIGN_TOPT(optlen);
			*worst_statusp = get_worst_status(topth->status,
			    *worst_statusp);
		}
	} else {		/* T_ALLOPT processing */
		/* only for T_NEGOTIATE case */
		ASSERT(optset_context == SETFN_OPTCOM_NEGOTIATE);

		/* scan and set all options to default value */
		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {

			/* skip other levels */
			if (reqopt->level != optd->opdes_level)
				continue;

			if (OA_EXECUTE_PERMISSION(optd, cr) ||
			    OA_NO_PERMISSION(optd, cr)) {
				/*
				 * skip this one too. Does not make sense to
				 * set anything to default value for "execute"
				 * options.
				 */
				continue;
			}

			if (OA_READONLY_PERMISSION(optd, cr)) {
				/*
				 * Return with T_READONLY status (and no value
				 * part). Note: spec is not clear but
				 * XTI test suite needs this.
				 */
				topth = (struct T_opthdr *)*resptrp;
				topth->len = sizeof (struct T_opthdr);
				*resptrp += topth->len;
				topth->level = reqopt->level;
				topth->name = optd->opdes_name;
				topth->status = T_READONLY;
				*worst_statusp = get_worst_status(topth->status,
				    *worst_statusp);
				continue;
			}

			/*
			 * It is not read only or execute type
			 * the it must have write permission
			 */
			ASSERT(OA_WRITE_PERMISSION(optd, cr));

			topth = (struct T_opthdr *)*resptrp;
			*resptrp += sizeof (struct T_opthdr);

			topth->len = sizeof (struct T_opthdr);
			topth->level = reqopt->level;
			topth->name = optd->opdes_name;
			if (optd->opdes_props & OP_NODEFAULT) {
				/*
				 * Option of "no default value" so it does not
				 * make sense to try to set it. We just return
				 * header with status of T_SUCCESS
				 * XXX should this be failure ?
				 */
				topth->status = T_SUCCESS;
				continue; /* skip setting */
			}
			if (optd->opdes_props & OP_DEF_FN) {
				if ((optd->opdes_props & OP_VARLEN) ||
				    ((optsize = (*deffn)(q, reqopt->level,
				    optd->opdes_name,
				    (uchar_t *)optd->opdes_defbuf)) < 0)) {
					/* XXX - skip these too */
					topth->status = T_SUCCESS;
					continue; /* skip setting */
				}
			} else {
				optsize = optd->opdes_size;
			}


			/* set option of this level */
			error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE,
			    reqopt->level, optd->opdes_name, optsize,
			    (uchar_t *)optd->opdes_defbuf, &optlen,
			    _TPI_TOPT_DATA(topth), NULL, cr);
			if (error) {
				/*
				 * failed, return as T_FAILURE and null value
				 * part. Maybe something downstream will
				 * handle this one and fill in a value. Here
				 * it is just part of T_ALLOPT expansion.
				 */
				topth->status = T_FAILURE;
				*worst_statusp = get_worst_status(topth->status,
				    *worst_statusp);
			} else {
				/* success */
				topth->len += optlen;
				topth->status = T_SUCCESS;
				*resptrp += _TPI_ALIGN_TOPT(optlen);
			}
		} /* end for loop */
		/* END T_ALLOPT */
	}

	if (*resptrp == initptr) {
		/*
		 * setfn failed and does not want to handle this option.
		 */
		reqopt->status = T_FAILURE;
		bcopy(reqopt, *resptrp, reqopt->len);
		*resptrp += _TPI_ALIGN_TOPT(reqopt->len);
		*worst_statusp = get_worst_status(reqopt->status,
		    *worst_statusp);
	}
}

/*
 * The following routines process options buffer passed with
 * T_CONN_REQ, T_CONN_RES and T_UNITDATA_REQ.
 * This routine does the consistency check applied to the
 * sanity of formatting of multiple options packed in the
 * buffer.
 *
 * XTI brain damage alert:
 * XTI interface adopts the notion of an option being an
 * "absolute requirement" from OSI transport service (but applies
 * it to all transports including Internet transports).
 * The main effect of that is action on failure to "negotiate" a
 * requested option to the exact requested value
 *
 *          - if the option is an "absolute requirement", the primitive
 *            is aborted (e.g T_DISCON_REQ or T_UDERR generated)
 *          - if the option is NOT and "absolute requirement" it can
 *            just be ignored.
 *
 * We would not support "negotiating" of options on connection
 * primitives for Internet transports. However just in case we
 * forced to in order to pass strange test suites, the design here
 * tries to support these notions.
 *
 * tpi_optcom_buf(q, mp, opt_lenp, opt_offset, cred, dbobjp, thisdg_attrs,
 *	*is_absreq_failurep)
 *
 * - Verify the option buffer, if formatted badly, return error 1
 *
 * - If it is a "permissions" failure (read-only), return error 2
 *
 * - Else, process the option "in place", the following can happen,
 *	     - if a "privileged" option, mark it as "ignored".
 *	     - if "not supported", mark "ignored"
 *	     - if "supported" attempt negotiation and fill result in
 *	       the outcome
 *			- if "absolute requirement", set "*is_absreq_failurep"
 *			- if NOT an "absolute requirement", then our
 *			  interpretation is to mark is at ignored if
 *			  negotiation fails (Spec allows partial success
 *			  as in OSI protocols but not failure)
 *
 *   Then delete "ignored" options from option buffer and return success.
 *
 */
int
tpi_optcom_buf(queue_t *q, mblk_t *mp, t_scalar_t *opt_lenp,
    t_scalar_t opt_offset, cred_t *cr, optdb_obj_t *dbobjp,
    void *thisdg_attrs, int *is_absreq_failurep)
{
	opt_set_fn setfn = dbobjp->odb_setfn;
	opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
	struct T_opthdr *opt, *opt_start, *opt_end;
	mblk_t  *copy_mp_head;
	uchar_t *optr, *init_optr;
	opdes_t *optd;
	uint_t optset_context;
	t_uscalar_t olen;
	int error = 0;

	ASSERT((uchar_t *)opt_lenp > mp->b_rptr &&
	    (uchar_t *)opt_lenp < mp->b_wptr);

	copy_mp_head = NULL;
	*is_absreq_failurep = 0;
	switch (((union T_primitives *)mp->b_rptr)->type) {
	case T_CONN_REQ:
	case T_CONN_RES:
		optset_context = SETFN_CONN_NEGOTIATE;
		break;
	case T_UNITDATA_REQ:
		optset_context = SETFN_UD_NEGOTIATE;
		break;
	default:
		/*
		 * should never get here, all possible TPI primitives
		 * where this can be called from should be accounted
		 * for in the cases above
		 */
		return (EINVAL);
	}

	if ((opt_start = (struct T_opthdr *)
	    mi_offset_param(mp, opt_offset, *opt_lenp)) == NULL) {
		error = ENOPROTOOPT;
		goto error_ret;
	}
	if (!__TPI_TOPT_ISALIGNED(opt_start)) {
		error = ENOPROTOOPT;
		goto error_ret;
	}

	opt_end = (struct T_opthdr *)((uchar_t *)opt_start
	    + *opt_lenp);

	if ((copy_mp_head = copyb(mp)) == (mblk_t *)NULL) {
		error = ENOMEM;
		goto error_ret;
	}

	init_optr = optr = (uchar_t *)&copy_mp_head->b_rptr[opt_offset];

	for (opt = opt_start; opt && (opt < opt_end);
	    opt = _TPI_TOPT_NEXTHDR(opt_start, *opt_lenp, opt)) {
		/*
		 * Validate the option for length and alignment
		 * before accessing anything in it
		 */
		if (!_TPI_TOPT_VALID(opt, opt_start, opt_end)) {
			error = ENOPROTOOPT;
			goto error_ret;
		}

		/* Find the option in the opt_arr. */
		optd = proto_opt_lookup(opt->level, opt->name,
		    opt_arr, opt_arr_cnt);

		if (optd == NULL) {
			/*
			 * Option not found
			 */
			opt->status = T_NOTSUPPORT;
			continue;
		}

		/*
		 * Weird but as in XTI spec.
		 * Sec 6.3.6 "Privileged and ReadOnly Options"
		 * Permission problems (e.g.readonly) fail with bad access
		 * BUT "privileged" option request from those NOT PRIVILEGED
		 * are to be merely "ignored".
		 * XXX Prevents "probing" of privileged options ?
		 */
		if (OA_READONLY_PERMISSION(optd, cr)) {
			error = EACCES;
			goto error_ret;
		}
		if (OA_MATCHED_PRIV(optd, cr)) {
			/*
			 * For privileged options, we DO perform
			 * access checks as is common sense
			 */
			if (!OA_WX_ANYPRIV(optd)) {
				error = EACCES;
				goto error_ret;
			}
		} else {
			/*
			 * For non privileged, we fail instead following
			 * "ignore" semantics dictated by XTI spec for
			 * permissions problems.
			 * Sec 6.3.6 "Privileged and ReadOnly Options"
			 * XXX Should we do "ignore" semantics ?
			 */
			if (!OA_WX_NOPRIV(optd)) { /* nopriv */
				opt->status = T_FAILURE;
				continue;
			}
		}
		/*
		 *
		 * If the negotiation fails, for options that
		 * are "absolute requirement", it is a fatal error.
		 * For options that are NOT "absolute requirements",
		 * and the value fails to negotiate, the XTI spec
		 * only considers the possibility of partial success
		 * (T_PARTSUCCES - not likely for Internet protocols).
		 * The spec is in denial about complete failure
		 * (T_FAILURE) to negotiate for options that are
		 * carried on T_CONN_REQ/T_CONN_RES/T_UNITDATA
		 * We interpret the T_FAILURE to negotiate an option
		 * that is NOT an absolute requirement that it is safe
		 * to ignore it.
		 */

		/* verify length */
		if (opt->len < (t_uscalar_t)sizeof (struct T_opthdr) ||
		    !opt_length_ok(optd, opt->len - sizeof (struct T_opthdr))) {
			/* bad size */
			if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) {
				/* option is absolute requirement */
				*is_absreq_failurep = 1;
				error = EINVAL;
				goto error_ret;
			}
			opt->status = T_FAILURE;
			continue;
		}

		/*
		 * verified generic attributes. Now call set function.
		 * Note: We assume the following to simplify code.
		 * XXX If this is found not to be valid, this routine
		 * will need to be rewritten. At this point it would
		 * be premature to introduce more complexity than is
		 * needed.
		 * Assumption: For variable length options, we assume
		 * that the value returned will be same or less length
		 * (size does not increase). This makes it OK to pass the
		 * same space for output as it is on input.
		 */

		error = (*setfn)(q, optset_context, opt->level, opt->name,
		    opt->len - (t_uscalar_t)sizeof (struct T_opthdr),
		    _TPI_TOPT_DATA(opt), &olen, _TPI_TOPT_DATA(opt),
		    thisdg_attrs, cr);

		if (olen > (int)(opt->len - sizeof (struct T_opthdr))) {
			/*
			 * Space on output more than space on input. Should
			 * not happen and we consider it a bug/error.
			 * More of a restriction than an error in our
			 * implementation. Will see if we can live with this
			 * otherwise code will get more hairy with multiple
			 * passes.
			 */
			error = EINVAL;
			goto error_ret;
		}
		if (error != 0) {
			if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) {
				/* option is absolute requirement. */
				*is_absreq_failurep = 1;
				goto error_ret;
			}
			/*
			 * failed - but option "not an absolute
			 * requirement"
			 */
			opt->status = T_FAILURE;
			continue;
		}
		/*
		 * Fill in the only possible successful result
		 * (Note: TPI allows for T_PARTSUCCESS - partial
		 * sucess result code which is relevant in OSI world
		 * and not possible in Internet code)
		 */
		opt->status = T_SUCCESS;

		/*
		 * Add T_SUCCESS result code options to the "output" options.
		 * No T_FAILURES or T_NOTSUPPORT here as they are to be
		 * ignored.
		 * This code assumes output option buffer will
		 * be <= input option buffer.
		 *
		 * Copy option header+value
		 */
		bcopy(opt, optr, opt->len);
		optr +=  _TPI_ALIGN_TOPT(opt->len);
	}
	/*
	 * Overwrite the input mblk option buffer now with the output
	 * and update length, and contents in original mbl
	 * (offset remains unchanged).
	 */
	*opt_lenp = (t_scalar_t)(optr - init_optr);
	if (*opt_lenp > 0) {
		bcopy(init_optr, opt_start, *opt_lenp);
	}

error_ret:
	if (copy_mp_head != NULL)
		freeb(copy_mp_head);
	return (error);
}

static boolean_t
opt_level_valid(t_uscalar_t level, optlevel_t *valid_level_arr,
    uint_t valid_level_arr_cnt)
{
	optlevel_t		*olp;

	for (olp = valid_level_arr;
	    olp < &valid_level_arr[valid_level_arr_cnt];
	    olp++) {
		if (level == (uint_t)(*olp))
			return (B_TRUE);
	}
	return (B_FALSE);
}


/*
 * Compute largest possible size for an option buffer containing
 * all options in one buffer.
 *
 * XXX TBD, investigate use of opt_bloated_maxsize() to avoid
 *     wastefully large buffer allocation.
 */
static size_t
opt_level_allopts_lengths(t_uscalar_t level, opdes_t *opt_arr,
    uint_t opt_arr_cnt)
{
	opdes_t		*optd;
	size_t allopt_len = 0;	/* 0 implies no option at this level */

	/*
	 * Scan opt_arr computing aggregate length
	 * requirement for storing values of all
	 * options.
	 * Note: we do not filter for permissions
	 * etc. This will be >= the real aggregate
	 * length required (upper bound).
	 */

	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt];
	    optd++) {
		if (level == optd->opdes_level) {
			allopt_len += sizeof (struct T_opthdr) +
			    _TPI_ALIGN_TOPT(optd->opdes_size);
		}
	}
	return (allopt_len);	/* 0 implies level not found */
}

/*
 * Compute largest possible size for an option buffer containing
 * all options in one buffer - a (theoretical?) worst case scenario
 * for certain cases.
 */
t_uscalar_t
optcom_max_optbuf_len(opdes_t *opt_arr, uint_t opt_arr_cnt)
{
	t_uscalar_t max_optbuf_len = sizeof (struct T_info_ack);
	opdes_t		*optd;

	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
		max_optbuf_len += (t_uscalar_t)sizeof (struct T_opthdr) +
		    (t_uscalar_t)_TPI_ALIGN_TOPT(optd->opdes_size);
	}
	return (max_optbuf_len);
}

/*
 * Compute largest possible size for OPT_size for a transport.
 * Heuristic used is to add all but certain extremely large
 * size options; this is done by calling opt_bloated_maxsize().
 * It affects user level allocations in TLI/XTI code using t_alloc()
 * and other TLI/XTI implementation instance strucutures.
 * The large size options excluded are presumed to be
 * never accessed through the (theoretical?) worst case code paths
 * through TLI/XTI as they are currently IPv6 specific options.
 */

t_uscalar_t
optcom_max_optsize(opdes_t *opt_arr, uint_t opt_arr_cnt)
{
	t_uscalar_t max_optbuf_len = sizeof (struct T_info_ack);
	opdes_t		*optd;

	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
		if (!opt_bloated_maxsize(optd)) {
			max_optbuf_len +=
			    (t_uscalar_t)sizeof (struct T_opthdr) +
			    (t_uscalar_t)_TPI_ALIGN_TOPT(optd->opdes_size);
		}
	}
	return (max_optbuf_len);
}

/*
 * The theoretical model used in optcom_max_optsize() and
 * opt_level_allopts_lengths() accounts for the worst case of all
 * possible options for the theoretical cases and results in wasteful
 * memory allocations for certain theoretically correct usage scenarios.
 * In practice, the "features" they support are rarely, if ever,
 * used and even then only by test suites for those features (VSU, VST).
 * However, they result in large allocations due to the increased transport
 * T_INFO_ACK OPT_size field affecting t_alloc() users and TLI/XTI library
 * instance data structures for applications.
 *
 * The following routine opt_bloated_maxsize() supports a hack that avoids
 * paying the tax for the bloated options by excluding them and pretending
 * they don't exist for certain features without affecting features that
 * do use them.
 *
 * XXX Currently implemented only for optcom_max_optsize()
 *     (to reduce risk late in release).
 *     TBD for future, investigate use in optcom_level_allopts_lengths() and
 *     all the instances of T_ALLOPT processing to exclude "bloated options".
 *     Will not affect VSU/VST tests as they do not test with IPPROTO_IPV6
 *     level options which are the only ones that fit the "bloated maxsize"
 *     option profile now.
 */
static boolean_t
opt_bloated_maxsize(opdes_t *optd)
{
	if (optd->opdes_level != IPPROTO_IPV6)
		return (B_FALSE);
	switch (optd->opdes_name) {
	case IPV6_HOPOPTS:
	case IPV6_DSTOPTS:
	case IPV6_RTHDRDSTOPTS:
	case IPV6_RTHDR:
	case IPV6_PATHMTU:
		return (B_TRUE);
	default:
		break;
	}
	return (B_FALSE);
}

/*
 * optlen is the length of the option content
 * Caller should check the optlen is at least sizeof (struct T_opthdr)
 */
static boolean_t
opt_length_ok(opdes_t *optd, t_uscalar_t optlen)
{
	/*
	 * Verify length.
	 * Value specified should match length of fixed length option or be
	 * less than maxlen of variable length option.
	 */
	if (optd->opdes_props & OP_VARLEN) {
		if (optlen <= optd->opdes_size)
			return (B_TRUE);
	} else {
		/* fixed length option */
		if (optlen == optd->opdes_size)
			return (B_TRUE);
	}
	return (B_FALSE);
}

/*
 * This routine manages the allocation and free of the space for
 * an extension header or option. Returns failure if memory
 * can not be allocated.
 */
int
optcom_pkt_set(uchar_t *invalp, uint_t inlen,
    uchar_t **optbufp, uint_t *optlenp)
{
	uchar_t *optbuf;
	uchar_t	*optp;

	if (inlen == *optlenp) {
		/* Unchanged length - no need to reallocate */
		optp = *optbufp;
		bcopy(invalp, optp, inlen);
		return (0);
	}
	if (inlen > 0) {
		/* Allocate new buffer before free */
		optbuf = kmem_alloc(inlen, KM_NOSLEEP);
		if (optbuf == NULL)
			return (ENOMEM);
	} else {
		optbuf = NULL;
	}

	/* Free old buffer */
	if (*optlenp != 0)
		kmem_free(*optbufp, *optlenp);

	if (inlen > 0)
		bcopy(invalp, optbuf, inlen);

	*optbufp = optbuf;
	*optlenp = inlen;
	return (0);
}

int
process_auxiliary_options(conn_t *connp, void *control, t_uscalar_t controllen,
    void *optbuf, optdb_obj_t *dbobjp, int (*opt_set_fn)(conn_t *,
    uint_t, int, int, uint_t, uchar_t *, uint_t *, uchar_t *, void *, cred_t *),
    cred_t *cr)
{
	struct cmsghdr *cmsg;
	opdes_t *optd;
	t_uscalar_t outlen;
	int error = EOPNOTSUPP;
	t_uscalar_t len;
	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
	opdes_t *opt_arr = dbobjp->odb_opt_des_arr;

	for (cmsg = (struct cmsghdr *)control;
	    CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
	    cmsg = CMSG_NEXT(cmsg)) {

		len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
		/* Find the option in the opt_arr. */
		optd = proto_opt_lookup(cmsg->cmsg_level, cmsg->cmsg_type,
		    opt_arr, opt_arr_cnt);
		if (optd == NULL) {
			return (EINVAL);
		}
		if (OA_READONLY_PERMISSION(optd, cr)) {
			return (EACCES);
		}
		if (OA_MATCHED_PRIV(optd, cr)) {
			/*
			 * For privileged options, we DO perform
			 * access checks as is common sense
			 */
			if (!OA_WX_ANYPRIV(optd)) {
				return (EACCES);
			}
		} else {
			/*
			 * For non privileged, we fail instead following
			 * "ignore" semantics dictated by XTI spec for
			 * permissions problems.
			 */
			if (!OA_WX_NOPRIV(optd)) { /* nopriv */
				return (EACCES);
			}
		}
		error = opt_set_fn(connp, SETFN_UD_NEGOTIATE, optd->opdes_level,
		    optd->opdes_name, len, (uchar_t *)CMSG_CONTENT(cmsg),
		    &outlen, (uchar_t *)CMSG_CONTENT(cmsg), optbuf, cr);
		if (error > 0) {
			return (error);
		} else if (outlen > len) {
			return (EINVAL);
		} else {
			/*
			 * error can be -ve if the protocol wants to
			 * pass the option to IP. We donot pass auxiliary
			 * options to IP.
			 */
			error = 0;
		}
	}
	return (error);
}