lib/snmp/snmplib.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * The snmp library helps to prepare the PDUs and communicate with
 * the snmp agent on the SP side via the ds_snmp driver.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <thread.h>
#include <synch.h>
#include <errno.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <libnvpair.h>
#include <sys/ds_snmp.h>

#include "libpiclsnmp.h"
#include "snmplib.h"
#include "asn1.h"
#include "pdu.h"
#include "debug.h"

#pragma init(libpiclsnmp_init)		/* need this in .init */

/*
 * Data from the MIB is fetched based on the hints about object
 * groups received from (possibly many threads in) the application.
 * However, the fetched data is kept in a common cache for use across
 * all threads, so even a GETBULK is issued only when absolutely
 * necessary.
 *
 * Note that locking is not fine grained (there's no locking per row)
 * since we don't expect too many MT consumers right away.
 *
 */
static mutex_t	mibcache_lock;
static nvlist_t	**mibcache = NULL;
static uint_t	n_mibcache_rows = 0;

static mutex_t snmp_reqid_lock;
static int snmp_reqid = 1;

#ifdef SNMP_DEBUG
uint_t snmp_nsends = 0;
uint_t snmp_sentbytes = 0;
uint_t snmp_nrecvs = 0;
uint_t snmp_rcvdbytes = 0;
#endif

#ifdef USE_SOCKETS
#define	SNMP_DEFAULT_PORT	161
#define	SNMP_MAX_RECV_PKTSZ	(64 * 1024)
#endif

/*
 * We need a reliably monotonic and stable source of time values to age
 * entries in the mibcache toward expiration.  The code originally used
 * gettimeofday(), but since that is subject to time-of-day changes made by
 * the administrator, the values it returns do not satisfy our needs.
 * Instead, we use gethrtime(), which is immune to time-of-day changes.
 * However, since gethrtime() returns a signed 64-bit value in units of
 * nanoseconds and we are using signed 32-bit timestamps, we always divide
 * the result by (HRTIME_SCALE * NANOSEC) to scale it down into units of 10
 * seconds.
 *
 * Note that the scaling factor means that the value of MAX_INCACHE_TIME
 * from snmplib.h should also be in units of 10 seconds.
 */
#define	GET_SCALED_HRTIME()	(int)(gethrtime() / (HRTIME_SCALE * NANOSEC))

/*
 * The mibcache code originally cached values for 300 seconds after fetching
 * data via SNMP.  Subsequent reads within that 300 second window would come
 * from the cache - which is quite a bit faster than an SNMP query - but the
 * first request that came in more than 300 seconds after the previous SNMP
 * query would trigger a new SNMP query.  This worked well as an
 * optimization for frequent queries, but when data was only queried less
 * frequently than every 300 seconds (as proved to be the case at multiple
 * customer sites), the cache didn't help at all.
 *
 * To improve the performance of infrequent queries, code was added to the
 * library to allow a client (i.e. a thread in the picl plugin) to proactively
 * refresh cache entries without waiting for them to expire, thereby ensuring
 * that all volatile entries in the cache at any given time are less than 300
 * seconds old.  Whenever an SNMP query is generated to retrieve volatile data
 * that will be cached, an entry is added in a refresh queue that tracks the
 * parameters of the query and the time that it was made.  A client can query
 * the age of the oldest item in the refresh queue and - at its discretion - can
 * then force that query to be repeated in a manner that will update the
 * mibcache entry even though it hasn't expired.
 */
typedef struct {
	struct picl_snmphdl	*smd;
	char			*oidstrs;
	int			n_oids;
	int			row;
	int			last_fetch_time;	/* in scaled hrtime */
} refreshq_job_t;

static mutex_t		refreshq_lock;
static refreshq_job_t	*refreshq = NULL;
static uint_t		n_refreshq_slots = 0;	/* # of alloc'ed job slots */
static uint_t		n_refreshq_jobs = 0;	/* # of unprocessed jobs */
static uint_t		refreshq_next_job = 0;	/* oldest unprocessed job */
static uint_t		refreshq_next_slot = 0;	/* next available job slot */


/*
 * Static function declarations
 */
static void	libpiclsnmp_init(void);

static int	lookup_int(char *, int, int *, int);
static int	lookup_str(char *, int, char **, int);
static int	lookup_bitstr(char *, int, uchar_t **, uint_t *, int);

static oidgroup_t *locate_oid_group(struct picl_snmphdl *, char *);
static int	search_oid_in_group(char *, char *, int);

static snmp_pdu_t *fetch_single(struct picl_snmphdl *, char *, int, int *);
static snmp_pdu_t *fetch_next(struct picl_snmphdl *, char *, int, int *);
static void	fetch_bulk(struct picl_snmphdl *, char *, int, int, int, int *);
static int	fetch_single_str(struct picl_snmphdl *, char *, int,
		    char **, int *);
static int	fetch_single_int(struct picl_snmphdl *, char *, int,
		    int *, int *);
static int	fetch_single_bitstr(struct picl_snmphdl *, char *, int,
		    uchar_t **, uint_t *, int *);

static int	snmp_send_request(struct picl_snmphdl *, snmp_pdu_t *, int *);
static int	snmp_recv_reply(struct picl_snmphdl *, snmp_pdu_t *, int *);

static int	mibcache_realloc(int);
static void	mibcache_populate(snmp_pdu_t *, int);
static char	*oid_to_oidstr(oid *, size_t);

static int	refreshq_realloc(int);
static int	refreshq_add_job(struct picl_snmphdl *, char *, int, int);


static void
libpiclsnmp_init(void)
{
	(void) mutex_init(&mibcache_lock, USYNC_THREAD, NULL);
	if (mibcache_realloc(0) < 0)
		(void) mutex_destroy(&mibcache_lock);

	(void) mutex_init(&refreshq_lock, USYNC_THREAD, NULL);
	(void) mutex_init(&snmp_reqid_lock, USYNC_THREAD, NULL);

	LOGINIT();
}

picl_snmphdl_t
snmp_init()
{
	struct picl_snmphdl	*smd;
#ifdef USE_SOCKETS
	int	sbuf = (1 << 15);	/* 16K */
	int	rbuf = (1 << 17);	/* 64K */
	char	*snmp_agent_addr;
#endif

	smd = (struct picl_snmphdl *)calloc(1, sizeof (struct picl_snmphdl));
	if (smd == NULL)
		return (NULL);

#ifdef USE_SOCKETS
	if ((snmp_agent_addr = getenv("SNMP_AGENT_IPADDR")) == NULL)
		return (NULL);

	if ((smd->fd = socket(PF_INET, SOCK_DGRAM, 0)) < 0)
		return (NULL);

	(void) setsockopt(smd->fd, SOL_SOCKET, SO_SNDBUF, &sbuf, sizeof (int));
	(void) setsockopt(smd->fd, SOL_SOCKET, SO_RCVBUF, &rbuf, sizeof (int));

	memset(&smd->agent_addr, 0, sizeof (struct sockaddr_in));
	smd->agent_addr.sin_family = AF_INET;
	smd->agent_addr.sin_port = htons(SNMP_DEFAULT_PORT);
	smd->agent_addr.sin_addr.s_addr = inet_addr(snmp_agent_addr);
#else
	smd->fd = open(DS_SNMP_DRIVER, O_RDWR);
	if (smd->fd < 0) {
		free(smd);
		return (NULL);
	}
#endif

	return ((picl_snmphdl_t)smd);
}

void
snmp_fini(picl_snmphdl_t hdl)
{
	struct picl_snmphdl	*smd = (struct picl_snmphdl *)hdl;

	if (smd) {
		if (smd->fd >= 0) {
			(void) close(smd->fd);
		}
		free(smd);
	}
}

int
snmp_reinit(picl_snmphdl_t hdl, int clr_linkreset)
{
	struct picl_snmphdl *smd = (struct picl_snmphdl *)hdl;
	nvlist_t *nvl;
	int i;

	(void) mutex_lock(&mibcache_lock);

	for (i = 0; i < n_mibcache_rows; i++) {
		if ((nvl = mibcache[i]) != NULL)
			nvlist_free(nvl);
	}

	n_mibcache_rows = 0;
	if (mibcache) {
		free(mibcache);
		mibcache = NULL;
	}

	(void) mutex_unlock(&mibcache_lock);

	if (clr_linkreset) {
		if (smd == NULL || smd->fd < 0)
			return (-1);
		else
			return (ioctl(smd->fd, DSSNMP_CLRLNKRESET, NULL));
	}

	return (0);
}

void
snmp_register_group(picl_snmphdl_t hdl, char *oidstrs, int n_oids, int is_vol)
{
	struct picl_snmphdl *smd = (struct picl_snmphdl *)hdl;
	oidgroup_t	*oidg;
	oidgroup_t	*curr, *prev;
	char		*p;
	int		i, sz;

	/*
	 * Allocate a new oidgroup_t
	 */
	oidg = (oidgroup_t *)calloc(1, sizeof (struct oidgroup));
	if (oidg == NULL)
		return;

	/*
	 * Determine how much space is required to register this group
	 */
	sz = 0;
	p = oidstrs;
	for (i = 0; i < n_oids; i++) {
		sz += strlen(p) + 1;
		p = oidstrs + sz;
	}

	/*
	 * Create this oid group
	 */
	if ((p = (char *)malloc(sz)) == NULL) {
		free((void *) oidg);
		return;
	}

	(void) memcpy(p, oidstrs, sz);

	oidg->next = NULL;
	oidg->oidstrs = p;
	oidg->n_oids = n_oids;
	oidg->is_volatile = is_vol;

	/*
	 * Link it to the tail of the list of oid groups
	 */
	for (prev = NULL, curr = smd->group; curr; curr = curr->next)
		prev = curr;

	if (prev == NULL)
		smd->group = oidg;
	else
		prev->next = oidg;
}

/*
 * snmp_get_int() takes in an OID and returns the integer value
 * of the object referenced in the passed arg. It returns 0 on
 * success and -1 on failure.
 */
int
snmp_get_int(picl_snmphdl_t hdl, char *prefix, int row, int *val,
    int *snmp_syserr)
{
	struct picl_snmphdl *smd = (struct picl_snmphdl *)hdl;
	oidgroup_t	*grp;
	int	ret;
	int	err = 0;

	if (smd == NULL || prefix == NULL || val == NULL)
		return (-1);

	/*
	 * If this item should not be cached, fetch it directly from
	 * the agent using fetch_single_xxx()
	 */
	if ((grp = locate_oid_group(smd, prefix)) == NULL) {
		ret = fetch_single_int(smd, prefix, row, val, &err);

		if (snmp_syserr)
			*snmp_syserr = err;

		return (ret);
	}

	/*
	 * is it in the cache ?
	 */
	if (lookup_int(prefix, row, val, grp->is_volatile) == 0)
		return (0);

	/*
	 * fetch it from the agent and populate the cache
	 */
	fetch_bulk(smd, grp->oidstrs, grp->n_oids, row, grp->is_volatile, &err);
	if (snmp_syserr)
		*snmp_syserr = err;

	/*
	 * look it up again and return it
	 */
	if (lookup_int(prefix, row, val, grp->is_volatile) < 0)
		return (-1);

	return (0);
}

/*
 * snmp_get_str() takes in an OID and returns the string value
 * of the object referenced in the passed arg. Memory for the string
 * is allocated within snmp_get_str() and is expected to be freed by
 * the caller when it is no longer needed. The function returns 0
 * on success and -1 on failure.
 */
int
snmp_get_str(picl_snmphdl_t hdl, char *prefix, int row, char **strp,
    int *snmp_syserr)
{
	struct picl_snmphdl *smd = (struct picl_snmphdl *)hdl;
	oidgroup_t	*grp;
	char	*val;
	int	ret;
	int	err = 0;

	if (smd == NULL || prefix == NULL || strp == NULL)
		return (-1);

	*strp = NULL;
	/*
	 * Check if this item is cacheable or not. If not, call
	 * fetch_single_* to get it directly from the agent
	 */
	if ((grp = locate_oid_group(smd, prefix)) == NULL) {
		ret = fetch_single_str(smd, prefix, row, strp, &err);

		if (snmp_syserr)
			*snmp_syserr = err;

		return (ret);
	}

	/*
	 * See if it's in the cache already
	 */
	if (lookup_str(prefix, row, &val, grp->is_volatile) == 0) {
		if ((*strp = strdup(val)) == NULL)
			return (-1);
		else
			return (0);
	}

	/*
	 * Fetch it from the agent and populate cache
	 */
	fetch_bulk(smd, grp->oidstrs, grp->n_oids, row, grp->is_volatile, &err);
	if (snmp_syserr)
		*snmp_syserr = err;

	/*
	 * Retry lookup
	 */
	if (lookup_str(prefix, row, &val, grp->is_volatile) < 0)
		return (-1);


	if ((*strp = strdup(val)) == NULL)
		return (-1);
	else
		return (0);
}

/*
 * snmp_get_bitstr() takes in an OID and returns the bit string value
 * of the object referenced in the passed args. Memory for the bitstring
 * is allocated within the function and is expected to be freed by
 * the caller when it is no longer needed. The function returns 0
 * on success and -1 on failure.
 */
int
snmp_get_bitstr(picl_snmphdl_t hdl, char *prefix, int row, uchar_t **bitstrp,
    uint_t *nbytes, int *snmp_syserr)
{
	struct picl_snmphdl *smd = (struct picl_snmphdl *)hdl;
	oidgroup_t	*grp;
	uchar_t	*val;
	int	ret;
	int	err = 0;

	if (smd == NULL || prefix == NULL || bitstrp == NULL || nbytes == NULL)
		return (-1);

	*bitstrp = NULL;
	/*
	 * Check if this item is cacheable or not. If not, call
	 * fetch_single_* to get it directly from the agent
	 */
	if ((grp = locate_oid_group(smd, prefix)) == NULL) {
		ret = fetch_single_bitstr(smd, prefix, row, bitstrp,
		    nbytes, &err);

		if (snmp_syserr)
			*snmp_syserr = err;

		return (ret);
	}

	/*
	 * See if it's in the cache already
	 */
	if (lookup_bitstr(prefix, row, &val, nbytes, grp->is_volatile) == 0) {
		if ((*bitstrp = (uchar_t *)calloc(*nbytes, 1)) == NULL)
			return (-1);
		(void) memcpy(*bitstrp, (const void *)val, *nbytes);
		return (0);
	}

	/*
	 * Fetch it from the agent and populate cache
	 */
	fetch_bulk(smd, grp->oidstrs, grp->n_oids, row, grp->is_volatile, &err);
	if (snmp_syserr)
		*snmp_syserr = err;

	/*
	 * Retry lookup
	 */
	if (lookup_bitstr(prefix, row, &val, nbytes, grp->is_volatile) < 0)
		return (-1);

	if ((*bitstrp = (uchar_t *)calloc(*nbytes, 1)) == NULL)
		return (-1);
	(void) memcpy(*bitstrp, (const void *)val, *nbytes);

	return (0);
}

/*
 * snmp_get_nextrow() is similar in operation to SNMP_GETNEXT, but
 * only just. In particular, this is only expected to return the next
 * valid row number for the same object, not its value. Since we don't
 * have any other means, we use this to determine the number of rows
 * in the table (and the valid ones). This function returns 0 on success
 * and -1 on failure.
 */
int
snmp_get_nextrow(picl_snmphdl_t hdl, char *prefix, int row, int *nextrow,
    int *snmp_syserr)
{
	struct picl_snmphdl *smd = (struct picl_snmphdl *)hdl;
	snmp_pdu_t *reply_pdu;
	pdu_varlist_t *vp;
	char	*nxt_oidstr;
	int	err = 0;

	if (smd == NULL || prefix == NULL || nextrow == NULL) {
		if (snmp_syserr)
			*snmp_syserr = EINVAL;
		return (-1);
	}

	/*
	 * The get_nextrow results should *never* go into any cache,
	 * since these relationships are dynamically discovered each time.
	 */
	if ((reply_pdu = fetch_next(smd, prefix, row, &err)) == NULL) {
		if (snmp_syserr)
			*snmp_syserr = err;
		return (-1);
	}

	/*
	 * We are not concerned about the "value" of the lexicographically
	 * next object; we only care about the name of that object and
	 * its row number (and whether such an object exists or not).
	 */
	vp = reply_pdu->vars;

	/*
	 * This indicates that we're at the end of the MIB view.
	 */
	if (vp == NULL || vp->name == NULL || vp->type == SNMP_NOSUCHOBJECT ||
	    vp->type == SNMP_NOSUCHINSTANCE || vp->type == SNMP_ENDOFMIBVIEW) {
		snmp_free_pdu(reply_pdu);
		if (snmp_syserr)
			*snmp_syserr = ENOSPC;
		return (-1);
	}

	/*
	 * need to be able to convert the OID
	 */
	if ((nxt_oidstr = oid_to_oidstr(vp->name, vp->name_len - 1)) == NULL) {
		snmp_free_pdu(reply_pdu);
		if (snmp_syserr)
			*snmp_syserr = ENOMEM;
		return (-1);
	}

	/*
	 * We're on to the next table.
	 */
	if (strcmp(nxt_oidstr, prefix) != 0) {
		free(nxt_oidstr);
		snmp_free_pdu(reply_pdu);
		if (snmp_syserr)
			*snmp_syserr = ENOENT;
		return (-1);
	}

	/*
	 * Ok, so we've got an oid that's simply the next valid row of the
	 * passed on object, return this row number.
	 */
	*nextrow = (vp->name)[vp->name_len-1];

	free(nxt_oidstr);
	snmp_free_pdu(reply_pdu);

	return (0);
}

/*
 * Request ids for snmp messages to the agent are sequenced here.
 */
int
snmp_get_reqid(void)
{
	int	ret;

	(void) mutex_lock(&snmp_reqid_lock);

	ret = snmp_reqid++;

	(void) mutex_unlock(&snmp_reqid_lock);

	return (ret);
}

static int
lookup_int(char *prefix, int row, int *valp, int is_vol)
{
	int32_t	*val_arr;
	uint_t	nelem;
	int	now;
	int	elapsed;

	(void) mutex_lock(&mibcache_lock);

	if (row >= n_mibcache_rows) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	if (mibcache[row] == NULL) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	/*
	 * If this is a volatile property, we should be searching
	 * for an integer-timestamp pair
	 */
	if (is_vol) {
		if (nvlist_lookup_int32_array(mibcache[row], prefix,
		    &val_arr, &nelem) != 0) {
			(void) mutex_unlock(&mibcache_lock);
			return (-1);
		}
		if (nelem != 2 || val_arr[1] < 0) {
			(void) mutex_unlock(&mibcache_lock);
			return (-1);
		}
		now = GET_SCALED_HRTIME();
		elapsed = now - val_arr[1];
		if (elapsed < 0 || elapsed > MAX_INCACHE_TIME) {
			(void) mutex_unlock(&mibcache_lock);
			return (-1);
		}

		*valp = (int)val_arr[0];
	} else {
		if (nvlist_lookup_int32(mibcache[row], prefix, valp) != 0) {
			(void) mutex_unlock(&mibcache_lock);
			return (-1);
		}
	}

	(void) mutex_unlock(&mibcache_lock);

	return (0);
}

static int
lookup_str(char *prefix, int row, char **valp, int is_vol)
{
	char	**val_arr;
	uint_t	nelem;
	int	now;
	int	elapsed;

	(void) mutex_lock(&mibcache_lock);

	if (row >= n_mibcache_rows) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	if (mibcache[row] == NULL) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	/*
	 * If this is a volatile property, we should be searching
	 * for a string-timestamp pair
	 */
	if (is_vol) {
		if (nvlist_lookup_string_array(mibcache[row], prefix,
		    &val_arr, &nelem) != 0) {
			(void) mutex_unlock(&mibcache_lock);
			return (-1);
		}
		if (nelem != 2 || atoi(val_arr[1]) <= 0) {
			(void) mutex_unlock(&mibcache_lock);
			return (-1);
		}
		now = GET_SCALED_HRTIME();
		elapsed = now - atoi(val_arr[1]);
		if (elapsed < 0 || elapsed > MAX_INCACHE_TIME) {
			(void) mutex_unlock(&mibcache_lock);
			return (-1);
		}

		*valp = val_arr[0];
	} else {
		if (nvlist_lookup_string(mibcache[row], prefix, valp) != 0) {
			(void) mutex_unlock(&mibcache_lock);
			return (-1);
		}
	}

	(void) mutex_unlock(&mibcache_lock);

	return (0);
}

static int
lookup_bitstr(char *prefix, int row, uchar_t **valp, uint_t *nelem, int is_vol)
{
	(void) mutex_lock(&mibcache_lock);

	if (row >= n_mibcache_rows) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	if (mibcache[row] == NULL) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	/*
	 * We don't support volatile bit string values yet. The nvlist
	 * functions don't support bitstring arrays like they do charstring
	 * arrays, so we would need to do things in a convoluted way,
	 * probably by attaching the timestamp as part of the byte array
	 * itself. However, the need for volatile bitstrings isn't there
	 * yet, to justify the effort.
	 */
	if (is_vol) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	if (nvlist_lookup_byte_array(mibcache[row], prefix, valp, nelem) != 0) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	(void) mutex_unlock(&mibcache_lock);

	return (0);
}

static int
search_oid_in_group(char *prefix, char *oidstrs, int n_oids)
{
	char	*p;
	int	i;

	p = oidstrs;
	for (i = 0; i < n_oids; i++) {
		if (strcmp(p, prefix) == 0)
			return (0);

		p += strlen(p) + 1;
	}

	return (-1);
}

static oidgroup_t *
locate_oid_group(struct picl_snmphdl *smd, char *prefix)
{
	oidgroup_t	*grp;

	if (smd == NULL)
		return (NULL);

	if (smd->group == NULL)
		return (NULL);

	for (grp = smd->group; grp; grp = grp->next) {
		if (search_oid_in_group(prefix, grp->oidstrs,
		    grp->n_oids) == 0) {
			return (grp);
		}
	}

	return (NULL);
}

static int
fetch_single_int(struct picl_snmphdl *smd, char *prefix, int row, int *ival,
    int *snmp_syserr)
{
	snmp_pdu_t *reply_pdu;
	pdu_varlist_t *vp;

	if ((reply_pdu = fetch_single(smd, prefix, row, snmp_syserr)) == NULL)
		return (-1);

	/*
	 * Note that we don't make any distinction between unsigned int
	 * value and signed int value at this point, since we provide
	 * only snmp_get_int() at the higher level. While it is possible
	 * to provide an entirely separate interface such as snmp_get_uint(),
	 * that's quite unnecessary, because we don't do any interpretation
	 * of the received value. Besides, the sizes of int and uint are
	 * the same and the sizes of all pointers are the same (so val.iptr
	 * would be the same as val.uiptr in pdu_varlist_t). If/when we
	 * violate any of these assumptions, it will be time to add
	 * snmp_get_uint().
	 */
	vp = reply_pdu->vars;
	if (vp == NULL || vp->val.iptr == NULL) {
		snmp_free_pdu(reply_pdu);
		return (-1);
	}

	*ival = *(vp->val.iptr);

	snmp_free_pdu(reply_pdu);

	return (0);
}

static int
fetch_single_str(struct picl_snmphdl *smd, char *prefix, int row, char **valp,
    int *snmp_syserr)
{
	snmp_pdu_t *reply_pdu;
	pdu_varlist_t *vp;

	if ((reply_pdu = fetch_single(smd, prefix, row, snmp_syserr)) == NULL)
		return (-1);

	vp = reply_pdu->vars;
	if (vp == NULL || vp->val.str == NULL) {
		snmp_free_pdu(reply_pdu);
		return (-1);
	}

	*valp = strdup((const char *)(vp->val.str));

	snmp_free_pdu(reply_pdu);

	return (0);
}

static int
fetch_single_bitstr(struct picl_snmphdl *smd, char *prefix, int row,
    uchar_t **valp, uint_t *nelem, int *snmp_syserr)
{
	snmp_pdu_t *reply_pdu;
	pdu_varlist_t *vp;

	if ((reply_pdu = fetch_single(smd, prefix, row, snmp_syserr)) == NULL)
		return (-1);

	vp = reply_pdu->vars;
	if (vp == NULL || vp->val.str == NULL) {
		snmp_free_pdu(reply_pdu);
		return (-1);
	}

	if ((*valp = (uchar_t *)calloc(vp->val_len, 1)) == NULL) {
		snmp_free_pdu(reply_pdu);
		return (-1);
	}

	*nelem = vp->val_len;
	(void) memcpy(*valp, (const void *)(vp->val.str),
	    (size_t)(vp->val_len));

	snmp_free_pdu(reply_pdu);

	return (0);
}

static snmp_pdu_t *
fetch_single(struct picl_snmphdl *smd, char *prefix, int row, int *snmp_syserr)
{
	snmp_pdu_t	*pdu, *reply_pdu;

	LOGGET(TAG_CMD_REQUEST, prefix, row);

	if ((pdu = snmp_create_pdu(SNMP_MSG_GET, 0, prefix, 1, row)) == NULL)
		return (NULL);

	LOGPDU(TAG_REQUEST_PDU, pdu);

	if (snmp_make_packet(pdu) < 0) {
		snmp_free_pdu(pdu);
		return (NULL);
	}

	LOGPKT(TAG_REQUEST_PKT, pdu->req_pkt, pdu->req_pktsz);

	if (snmp_send_request(smd, pdu, snmp_syserr) < 0) {
		snmp_free_pdu(pdu);
		return (NULL);
	}

	if (snmp_recv_reply(smd, pdu, snmp_syserr) < 0) {
		snmp_free_pdu(pdu);
		return (NULL);
	}

	LOGPKT(TAG_RESPONSE_PKT, pdu->reply_pkt, pdu->reply_pktsz);

	reply_pdu = snmp_parse_reply(pdu->reqid, pdu->reply_pkt,
	    pdu->reply_pktsz);

	LOGPDU(TAG_RESPONSE_PDU, reply_pdu);

	snmp_free_pdu(pdu);

	return (reply_pdu);
}

static void
fetch_bulk(struct picl_snmphdl *smd, char *oidstrs, int n_oids,
    int row, int is_vol, int *snmp_syserr)
{
	snmp_pdu_t	*pdu, *reply_pdu;
	int		max_reps;

	LOGBULK(TAG_CMD_REQUEST, n_oids, oidstrs, row);

	/*
	 * If we're fetching volatile properties using BULKGET, don't
	 * venture to get multiple rows (passing max_reps=0 will make
	 * snmp_create_pdu() fetch SNMP_DEF_MAX_REPETITIONS rows)
	 */
	max_reps = is_vol ? 1 : 0;

	pdu = snmp_create_pdu(SNMP_MSG_GETBULK, max_reps, oidstrs, n_oids, row);
	if (pdu == NULL)
		return;

	LOGPDU(TAG_REQUEST_PDU, pdu);

	/*
	 * Make an ASN.1 encoded packet from the PDU information
	 */
	if (snmp_make_packet(pdu) < 0) {
		snmp_free_pdu(pdu);
		return;
	}

	LOGPKT(TAG_REQUEST_PKT, pdu->req_pkt, pdu->req_pktsz);

	/*
	 * Send the request packet to the agent
	 */
	if (snmp_send_request(smd, pdu, snmp_syserr) < 0) {
		snmp_free_pdu(pdu);
		return;
	}

	/*
	 * Receive response from the agent into the reply packet buffer
	 * in the request PDU
	 */
	if (snmp_recv_reply(smd, pdu, snmp_syserr) < 0) {
		snmp_free_pdu(pdu);
		return;
	}

	LOGPKT(TAG_RESPONSE_PKT, pdu->reply_pkt, pdu->reply_pktsz);

	/*
	 * Parse the reply, validate the response and create a
	 * reply-PDU out of the information. Populate the mibcache
	 * with the received values.
	 */
	reply_pdu = snmp_parse_reply(pdu->reqid, pdu->reply_pkt,
	    pdu->reply_pktsz);
	if (reply_pdu) {
		LOGPDU(TAG_RESPONSE_PDU, reply_pdu);

		if (reply_pdu->errstat == SNMP_ERR_NOERROR) {
			if (is_vol) {
				/* Add a job to the cache refresh work queue */
				(void) refreshq_add_job(smd, oidstrs, n_oids,
				    row);
			}

			mibcache_populate(reply_pdu, is_vol);
		}

		snmp_free_pdu(reply_pdu);
	}

	snmp_free_pdu(pdu);
}

static snmp_pdu_t *
fetch_next(struct picl_snmphdl *smd, char *prefix, int row, int *snmp_syserr)
{
	snmp_pdu_t	*pdu, *reply_pdu;

	LOGNEXT(TAG_CMD_REQUEST, prefix, row);

	pdu = snmp_create_pdu(SNMP_MSG_GETNEXT, 0, prefix, 1, row);
	if (pdu == NULL)
		return (NULL);

	LOGPDU(TAG_REQUEST_PDU, pdu);

	if (snmp_make_packet(pdu) < 0) {
		snmp_free_pdu(pdu);
		return (NULL);
	}

	LOGPKT(TAG_REQUEST_PKT, pdu->req_pkt, pdu->req_pktsz);

	if (snmp_send_request(smd, pdu, snmp_syserr) < 0) {
		snmp_free_pdu(pdu);
		return (NULL);
	}

	if (snmp_recv_reply(smd, pdu, snmp_syserr) < 0) {
		snmp_free_pdu(pdu);
		return (NULL);
	}

	LOGPKT(TAG_RESPONSE_PKT, pdu->reply_pkt, pdu->reply_pktsz);

	reply_pdu = snmp_parse_reply(pdu->reqid, pdu->reply_pkt,
	    pdu->reply_pktsz);

	LOGPDU(TAG_RESPONSE_PDU, reply_pdu);

	snmp_free_pdu(pdu);

	return (reply_pdu);
}

static int
snmp_send_request(struct picl_snmphdl *smd, snmp_pdu_t *pdu, int *snmp_syserr)
{
	extern int	errno;
#ifdef USE_SOCKETS
	int		ret;
#endif

	if (smd->fd < 0)
		return (-1);

	if (pdu == NULL || pdu->req_pkt == NULL)
		return (-1);

#ifdef USE_SOCKETS
	ret = -1;
	while (ret < 0) {
		LOGIO(TAG_SENDTO, smd->fd, pdu->req_pkt, pdu->req_pktsz);

		ret = sendto(smd->fd, pdu->req_pkt, pdu->req_pktsz, 0,
		    (struct sockaddr *)&smd->agent_addr,
		    sizeof (struct sockaddr));
		if (ret < 0 && errno != EINTR) {
			return (-1);
		}
	}
#else
	LOGIO(TAG_WRITE, smd->fd, pdu->req_pkt, pdu->req_pktsz);

	if (write(smd->fd, pdu->req_pkt, pdu->req_pktsz) < 0) {
		if (snmp_syserr)
			*snmp_syserr = errno;
		return (-1);
	}
#endif

#ifdef SNMP_DEBUG
	snmp_nsends++;
	snmp_sentbytes += pdu->req_pktsz;
#endif

	return (0);
}

static int
snmp_recv_reply(struct picl_snmphdl *smd, snmp_pdu_t *pdu, int *snmp_syserr)
{
	struct dssnmp_info	snmp_info;
	size_t	pktsz;
	uchar_t	*pkt;
	extern int errno;
#ifdef USE_SOCKETS
	struct sockaddr_in 	from;
	int	fromlen;
	ssize_t	msgsz;
#endif

	if (smd->fd < 0 || pdu == NULL)
		return (-1);

#ifdef USE_SOCKETS
	if ((pkt = (uchar_t *)calloc(1, SNMP_MAX_RECV_PKTSZ)) == NULL)
		return (-1);

	fromlen = sizeof (struct sockaddr_in);

	LOGIO(TAG_RECVFROM, smd->fd, pkt, SNMP_MAX_RECV_PKTSZ);

	msgsz = recvfrom(smd->fd, pkt, SNMP_MAX_RECV_PKTSZ, 0,
	    (struct sockaddr *)&from, &fromlen);
	if (msgsz  < 0 || msgsz >= SNMP_MAX_RECV_PKTSZ) {
		free(pkt);
		return (-1);
	}

	pktsz = (size_t)msgsz;
#else
	LOGIO(TAG_IOCTL, smd->fd, DSSNMP_GETINFO, &snmp_info);

	/*
	 * The ioctl will block until we have snmp data available
	 */
	if (ioctl(smd->fd, DSSNMP_GETINFO, &snmp_info) < 0) {
		if (snmp_syserr)
			*snmp_syserr = errno;
		return (-1);
	}

	pktsz = snmp_info.size;
	if ((pkt = (uchar_t *)calloc(1, pktsz)) == NULL)
		return (-1);

	LOGIO(TAG_READ, smd->fd, pkt, pktsz);

	if (read(smd->fd, pkt, pktsz) < 0) {
		free(pkt);
		if (snmp_syserr)
			*snmp_syserr = errno;
		return (-1);
	}
#endif

	pdu->reply_pkt = pkt;
	pdu->reply_pktsz = pktsz;

#ifdef SNMP_DEBUG
	snmp_nrecvs++;
	snmp_rcvdbytes += pktsz;
#endif

	return (0);
}

static int
mibcache_realloc(int hint)
{
	uint_t		count = (uint_t)hint;
	nvlist_t	**p;

	if (hint < 0)
		return (-1);

	(void) mutex_lock(&mibcache_lock);

	if (hint < n_mibcache_rows) {
		(void) mutex_unlock(&mibcache_lock);
		return (0);
	}

	count =  ((count >> MIBCACHE_BLK_SHIFT) + 1) << MIBCACHE_BLK_SHIFT;

	p = (nvlist_t **)calloc(count, sizeof (nvlist_t *));
	if (p == NULL) {
		(void) mutex_unlock(&mibcache_lock);
		return (-1);
	}

	if (mibcache) {
		(void) memcpy((void *) p, (void *) mibcache,
		    n_mibcache_rows * sizeof (nvlist_t *));
		free((void *) mibcache);
	}

	mibcache = p;
	n_mibcache_rows = count;

	(void) mutex_unlock(&mibcache_lock);

	return (0);
}


/*
 * Scan each variable in the returned PDU's bindings and populate
 * the cache appropriately
 */
static void
mibcache_populate(snmp_pdu_t *pdu, int is_vol)
{
	pdu_varlist_t	*vp;
	int		row, ret;
	char		*oidstr;
	int		tod;	/* in secs */
	char		tod_str[MAX_INT_LEN];
	int		ival_arr[2];
	char		*sval_arr[2];

	/*
	 * If we're populating volatile properties, we also store a
	 * timestamp with each property value. When we lookup, we check the
	 * current time against this timestamp to determine if we need to
	 * refetch the value or not (refetch if it has been in for far too
	 * long).
	 */

	if (is_vol) {
		tod = GET_SCALED_HRTIME();

		tod_str[0] = 0;
		(void) snprintf(tod_str, MAX_INT_LEN, "%d", tod);

		ival_arr[1] = tod;
		sval_arr[1] = (char *)tod_str;
	}

	for (vp = pdu->vars; vp; vp = vp->nextvar) {
		if (vp->type != ASN_INTEGER && vp->type != ASN_OCTET_STR &&
		    vp->type != ASN_BIT_STR) {
			continue;
		}

		if (vp->name == NULL || vp->val.str == NULL)
			continue;

		row = (vp->name)[vp->name_len-1];

		(void) mutex_lock(&mibcache_lock);

		if (row >= n_mibcache_rows) {
			(void) mutex_unlock(&mibcache_lock);
			if (mibcache_realloc(row) < 0)
				continue;
			(void) mutex_lock(&mibcache_lock);
		}
		ret = 0;
		if (mibcache[row] == NULL)
			ret = nvlist_alloc(&mibcache[row], NV_UNIQUE_NAME, 0);

		(void) mutex_unlock(&mibcache_lock);

		if (ret != 0)
			continue;

		/*
		 * Convert the standard OID form into an oid string that
		 * we can use as the key to lookup. Since we only search
		 * by the prefix (mibcache is really an array of nvlist_t
		 * pointers), ignore the leaf subid.
		 */
		oidstr = oid_to_oidstr(vp->name, vp->name_len - 1);
		if (oidstr == NULL)
			continue;

		(void) mutex_lock(&mibcache_lock);

		if (vp->type == ASN_INTEGER) {
			if (is_vol) {
				ival_arr[0] = *(vp->val.iptr);
				(void) nvlist_add_int32_array(mibcache[row],
				    oidstr, ival_arr, 2);
			} else {
				(void) nvlist_add_int32(mibcache[row],
				    oidstr, *(vp->val.iptr));
			}

		} else if (vp->type == ASN_OCTET_STR) {
			if (is_vol) {
				sval_arr[0] = (char *)vp->val.str;
				(void) nvlist_add_string_array(mibcache[row],
				    oidstr, sval_arr, 2);
			} else {
				(void) nvlist_add_string(mibcache[row],
				    oidstr, (const char *)(vp->val.str));
			}
		} else if (vp->type == ASN_BIT_STR) {
			/*
			 * We don't support yet bit string objects that are
			 * volatile values.
			 */
			if (!is_vol) {
				(void) nvlist_add_byte_array(mibcache[row],
				    oidstr, (uchar_t *)(vp->val.str),
				    (uint_t)vp->val_len);
			}
		}
		(void) mutex_unlock(&mibcache_lock);

		free(oidstr);
	}
}

static char *
oid_to_oidstr(oid *objid, size_t n_subids)
{
	char	*oidstr;
	char	subid_str[MAX_INT_LEN];
	int	i, isize;
	size_t	oidstr_sz;

	/*
	 * ugly, but for now this will have to do.
	 */
	oidstr_sz = sizeof (subid_str) * n_subids;
	oidstr = calloc(1, oidstr_sz);

	for (i = 0; i < n_subids; i++) {
		(void) memset(subid_str, 0, sizeof (subid_str));
		isize = snprintf(subid_str, sizeof (subid_str), "%d",
		    objid[i]);
		if (isize >= sizeof (subid_str))
			return (NULL);

		(void) strlcat(oidstr, subid_str, oidstr_sz);
		if (i < (n_subids - 1))
			(void) strlcat(oidstr, ".", oidstr_sz);
	}

	return (oidstr);
}

/*
 * Expand the refreshq to hold more cache refresh jobs.  Caller must already
 * hold refreshq_lock mutex.  Every expansion of the refreshq will add
 * REFRESH_BLK_SZ job slots, rather than expanding by one slot every time more
 * space is needed.
 */
static int
refreshq_realloc(int hint)
{
	uint_t		count = (uint_t)hint;
	refreshq_job_t	*p;

	if (hint < 0)
		return (-1);

	if (hint < n_refreshq_slots) {
		return (0);
	}

	/* Round count up to next multiple of REFRESHQ_BLK_SHIFT */
	count =  ((count >> REFRESHQ_BLK_SHIFT) + 1) << REFRESHQ_BLK_SHIFT;

	p = (refreshq_job_t *)calloc(count, sizeof (refreshq_job_t));
	if (p == NULL) {
		return (-1);
	}

	if (refreshq) {
		if (n_refreshq_jobs == 0) {
			/* Simple case, nothing to copy */
			refreshq_next_job = 0;
			refreshq_next_slot = 0;
		} else if (refreshq_next_slot > refreshq_next_job) {
			/* Simple case, single copy preserves everything */
			(void) memcpy((void *) p,
			    (void *) &(refreshq[refreshq_next_job]),
			    n_refreshq_jobs * sizeof (refreshq_job_t));
		} else {
			/*
			 * Complex case.  The jobs in the refresh queue wrap
			 * around the end of the array in which they are stored.
			 * To preserve chronological order in the new allocated
			 * array, we need to copy the jobs at the end of the old
			 * array to the beginning of the new one and place the
			 * jobs from the beginning of the old array after them.
			 */
			uint_t tail_jobs, head_jobs;

			tail_jobs = n_refreshq_slots - refreshq_next_job;
			head_jobs = n_refreshq_jobs - tail_jobs;

			/* Copy the jobs from the end of the old array */
			(void) memcpy((void *) p,
			    (void *) &(refreshq[refreshq_next_job]),
			    tail_jobs * sizeof (refreshq_job_t));

			/* Copy the jobs from the beginning of the old array */
			(void) memcpy((void *) &(p[tail_jobs]),
			    (void *) &(refreshq[refreshq_next_job]),
			    head_jobs * sizeof (refreshq_job_t));

			/* update the job and slot indices to match */
			refreshq_next_job = 0;
			refreshq_next_slot = n_refreshq_jobs;
		}
		free((void *) refreshq);
	} else {
		/* First initialization */
		refreshq_next_job = 0;
		refreshq_next_slot = 0;
		n_refreshq_jobs = 0;
	}

	refreshq = p;
	n_refreshq_slots = count;

	return (0);
}

/*
 * Add a new job to the refreshq.  If there aren't any open slots, attempt to
 * expand the queue first.  Return -1 if unable to add the job to the work
 * queue, or 0 if the job was added OR if an existing job with the same
 * parameters is already pending.
 */
static int
refreshq_add_job(struct picl_snmphdl *smd, char *oidstrs, int n_oids, int row)
{
	int	i;
	int	job;

	(void) mutex_lock(&refreshq_lock);

	/*
	 * Can't do anything without a queue.  Either the client never
	 * initialized the refresh queue or the initial memory allocation
	 * failed.
	 */
	if (refreshq == NULL) {
		(void) mutex_unlock(&refreshq_lock);
		return (-1);
	}

	/*
	 * If there is already a job pending with the same parameters as the job
	 * we have been asked to add, we apparently let an entry expire and it
	 * is now being reloaded.  Rather than add another job for the same
	 * entry, we skip adding the new job and let the existing job address
	 * it.
	 */
	for (i = 0, job = refreshq_next_job; i < n_refreshq_jobs; i++,
	    job = (job + 1) % n_refreshq_slots) {
		if ((refreshq[job].row == row) &&
		    (refreshq[job].n_oids == n_oids) &&
		    (refreshq[job].oidstrs == oidstrs)) {
			(void) mutex_unlock(&refreshq_lock);
			return (0);
		}
	}


	/*
	 * If the queue is full, we need to expand it
	 */
	if (n_refreshq_jobs == n_refreshq_slots) {
		if (refreshq_realloc(n_refreshq_slots + 1) < 0) {
			/*
			 * Can't expand the job queue, so we drop this job on
			 * the floor.  No data is lost... we just allow some
			 * data in the mibcache to expire.
			 */
			(void) mutex_unlock(&refreshq_lock);
			return (-1);
		}
	}

	/*
	 * There is room in the queue, so add the new job.  We are actually
	 * taking a timestamp for this job that is slightly earlier than when
	 * the mibcache entry will be updated, but since we're trying to update
	 * the mibcache entry before it expires anyway, the earlier timestamp
	 * here is acceptable.
	 */
	refreshq[refreshq_next_slot].smd = smd;
	refreshq[refreshq_next_slot].oidstrs = oidstrs;
	refreshq[refreshq_next_slot].n_oids = n_oids;
	refreshq[refreshq_next_slot].row = row;
	refreshq[refreshq_next_slot].last_fetch_time = GET_SCALED_HRTIME();

	/*
	 * Update queue management variables
	 */
	n_refreshq_jobs += 1;
	refreshq_next_slot = (refreshq_next_slot + 1) % n_refreshq_slots;

	(void) mutex_unlock(&refreshq_lock);

	return (0);
}

/*
 * Almost all of the refresh code remains dormant unless specifically
 * initialized by a client (the exception being that fetch_bulk() will still
 * call refreshq_add_job(), but the latter will return without doing anything).
 */
int
snmp_refresh_init(void)
{
	int ret;

	(void) mutex_lock(&refreshq_lock);

	ret = refreshq_realloc(0);

	(void) mutex_unlock(&refreshq_lock);

	return (ret);
}

/*
 * If the client is going away, we don't want to keep doing refresh work, so
 * clean everything up.
 */
void
snmp_refresh_fini(void)
{
	(void) mutex_lock(&refreshq_lock);

	n_refreshq_jobs = 0;
	n_refreshq_slots = 0;
	refreshq_next_job = 0;
	refreshq_next_slot = 0;
	free(refreshq);
	refreshq = NULL;

	(void) mutex_unlock(&refreshq_lock);
}

/*
 * Return the number of seconds remaining before the mibcache entry associated
 * with the next job in the queue will expire.  Note that this requires
 * reversing the scaling normally done on hrtime values.  (The need for scaling
 * is purely internal, and should be hidden from clients.)  If there are no jobs
 * in the queue, return -1.  If the next job has already expired, return 0.
 */
int
snmp_refresh_get_next_expiration(void)
{
	int ret;
	int elapsed;

	(void) mutex_lock(&refreshq_lock);

	if (n_refreshq_jobs == 0) {
		ret = -1;
	} else {
		elapsed = GET_SCALED_HRTIME() -
		    refreshq[refreshq_next_job].last_fetch_time;

		if (elapsed >= MAX_INCACHE_TIME) {
			ret = 0;
		} else {
			ret = (MAX_INCACHE_TIME - elapsed) * HRTIME_SCALE;
		}
	}

	(void) mutex_unlock(&refreshq_lock);

	return (ret);
}

/*
 * Given the number of seconds the client wants to spend on each cyle of
 * processing jobs and then sleeping, return a suggestion for the number of jobs
 * the client should process, calculated by dividing the client's cycle duration
 * by MAX_INCACHE_TIME and multiplying the result by the total number of jobs in
 * the queue.  (Note that the actual implementation of that calculation is done
 * in a different order to avoid losing fractional values during integer
 * arithmetic.)
 */
int
snmp_refresh_get_cycle_hint(int secs)
{
	int	jobs;

	(void) mutex_lock(&refreshq_lock);

	/*
	 * First, we need to scale the client's cycle time to get it into the
	 * same units we use internally (i.e. tens of seconds).  We round up, as
	 * it makes more sense for the client to process extra jobs than
	 * insufficient jobs.  If the client's desired cycle time is greater
	 * than MAX_INCACHE_TIME, we just return the current total number of
	 * jobs.
	 */
	secs = (secs + HRTIME_SCALE - 1) / HRTIME_SCALE;

	jobs = (n_refreshq_jobs * secs) / MAX_INCACHE_TIME;
	if (jobs > n_refreshq_jobs) {
		jobs = n_refreshq_jobs;
	}

	(void) mutex_unlock(&refreshq_lock);

	return (jobs);
}

/*
 * Process the next job on the refresh queue by invoking fetch_bulk() with the
 * recorded parameters.  Return -1 if no job was processed (e.g. because there
 * aren't any available), or 0 if a job was processed.  We don't actually care
 * if fetch_bulk() fails, since we're just working on cache entry refreshing and
 * the worst case result of failing here is a longer delay getting that data the
 * next time it is requested.
 */
int
snmp_refresh_process_job(void)
{
	struct picl_snmphdl	*smd;
	char			*oidstrs;
	int			n_oids;
	int			row;
	int			err;

	(void) mutex_lock(&refreshq_lock);

	if (n_refreshq_jobs == 0) {
		(void) mutex_unlock(&refreshq_lock);

		return (-1);
	}

	smd = refreshq[refreshq_next_job].smd;
	oidstrs = refreshq[refreshq_next_job].oidstrs;
	n_oids = refreshq[refreshq_next_job].n_oids;
	row = refreshq[refreshq_next_job].row;

	refreshq_next_job = (refreshq_next_job + 1) % n_refreshq_slots;
	n_refreshq_jobs--;

	(void) mutex_unlock(&refreshq_lock);


	/*
	 * fetch_bulk() is going to come right back into the refresh code to add
	 * a new job for the entry we just loaded, which means we have to make
	 * the call without holding the refreshq_lock mutex.
	 */
	fetch_bulk(smd, oidstrs, n_oids, row, 1, &err);

	return (0);
}