apix_irm.c (revision 7ff178cd8db129d385d3177eb20744d3b6efc59b) - OpenGrok cross reference for /titanic_41/usr/src/uts/i86pc/io/apix/apix_irm.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/ddi.h>
#include <sys/sunndi.h>
#include <sys/ddi_impldefs.h>
#include <sys/psm_types.h>
#include <sys/smp_impldefs.h>
#include <sys/apic.h>
#include <sys/processor.h>
#include <sys/apix_irm_impl.h>

/* global variable for static default limit for non-IRM drivers */
extern int ddi_msix_alloc_limit;

/* Extern declarations */
extern int (*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
    psm_intr_op_t, int *);

/*
 * Global variables for IRM pool configuration:
 *
 *    (1) apix_system_max_vectors -- this would limit the maximum
 *    number of interrupt vectors that will be made avilable
 *    to the device drivers. The default value (-1) indicates
 *    that all the available vectors could be used.
 *
 *    (2) apix_irm_cpu_factor -- This would specify the number of CPUs that
 *    should be excluded from the global IRM pool of interrupt vectors.
 *    By default this would be zero, so vectors from all the CPUs
 *    present will be factored into the IRM pool.
 *
 *    (3) apix_irm_reserve_fixed_vectors -- This would specify the number
 *    of vectors that should be reserved for FIXED type interrupts and
 *    exclude them from the IRM pool. The value can be one of the
 *    following:
 *	0	- no reservation (default)
 *	<n>	- a positive number for the reserved cache
 *	-1	- reserve the maximum needed
 *
 *    (4) apix_irm_free_fixed_vectors -- This flag specifies if the
 *    vectors for FIXED type should be freed and added back
 *    to the IRM pool when ddi_intr_free() is called. The default
 *    is to add it back to the pool.
 */
int apix_system_max_vectors = -1;
int apix_irm_cpu_factor = 0;
int apix_irm_reserve_fixed_vectors = 0;
int apix_irm_free_fixed_vector = 1;

/* info from APIX module for IRM configuration */
apix_irm_info_t apix_irminfo;

kmutex_t apix_irm_lock; /* global mutex for apix_irm_* data */
ddi_irm_params_t apix_irm_params; /* IRM pool info */
int apix_irm_cache_size = 0; /* local cache for FIXED type requests */
int apix_irm_cpu_factor_available = 0;
int apix_irm_max_cpus = 0;
int apix_irm_cpus_used = 0;
int apix_irm_fixed_intr_vectors_used;

extern int ncpus;

/* local data/functions */
static int apix_irm_chk_apix();
int apix_irm_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
	psm_intr_op_t op, int *result);
int apix_irm_disable_intr(processorid_t);
void apix_irm_enable_intr(processorid_t);
int (*psm_intr_ops_saved)(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
	psm_intr_op_t op, int *result) = NULL;
int (*psm_disable_intr_saved)(processorid_t) = NULL;
void (*psm_enable_intr_saved)(processorid_t) = NULL;
int apix_irm_alloc_fixed(dev_info_t *, ddi_intr_handle_impl_t *, int *);
int apix_irm_free_fixed(dev_info_t *, ddi_intr_handle_impl_t *, int *);

/*
 * Initilaize IRM pool for APIC interrupts if the PSM module
 * is of APIX type. This should be called only after PSM module
 * is loaded and APIC interrupt system is initialized.
 */
void
apix_irm_init(void)
{
	dev_info_t		*dip;
	int			total_avail_vectors;
	int			cpus_used;
	int			cache_size;

	/* nothing to do if IRM is disabled */
	if (!irm_enable)
		return;

	/*
	 * Use root devinfo node to associate the IRM pool with it
	 * as the pool is global to the system.
	 */
	dip = ddi_root_node();

	/*
	 * Check if PSM module is initialized and it is APIX
	 * module (which supports IRM functionality).
	 */
	if ((psm_intr_ops == NULL) || !apix_irm_chk_apix()) {
		/* not an APIX module */
		APIX_IRM_DEBUG((CE_CONT,
		    "apix_irm_init: APIX module not present"));
		return;
	}

	/*
	 * Now, determine the IRM pool parameters based on the
	 * info from APIX module and global config variables.
	 */

	/*
	 * apix_ncpus shows all the CPUs present in the
	 * system but not all of them may have been enabled
	 * (i.e. mp_startup() may not have been called yet).
	 * So, use ncpus for IRM pool creation.
	 */
	if (apix_irminfo.apix_ncpus > ncpus)
		apix_irminfo.apix_ncpus = ncpus;

	/* apply the CPU factor if possible */
	if ((apix_irm_cpu_factor > 0) &&
	    (apix_irminfo.apix_ncpus > apix_irm_cpu_factor)) {
		cpus_used = apix_irminfo.apix_ncpus - apix_irm_cpu_factor;
		apix_irm_cpu_factor_available = apix_irm_cpu_factor;
	} else {
		cpus_used = apix_irminfo.apix_ncpus;
	}
	apix_irm_cpus_used = apix_irm_max_cpus = cpus_used;

	APIX_IRM_DEBUG((CE_CONT,
	    "apix_irm_init: %d CPUs used for IRM pool size", cpus_used));

	total_avail_vectors = cpus_used * apix_irminfo.apix_per_cpu_vectors -
	    apix_irminfo.apix_vectors_allocated;

	apix_irm_fixed_intr_vectors_used = apix_irminfo.apix_vectors_allocated;

	if (total_avail_vectors <= 0) {
		/* can not determine pool size */
		APIX_IRM_DEBUG((CE_NOTE,
		    "apix_irm_init: can not determine pool size"));
		return;
	}

	/* adjust the pool size as per the global config variable */
	if ((apix_system_max_vectors > 0) &&
	    (apix_system_max_vectors < total_avail_vectors))
		total_avail_vectors = apix_system_max_vectors;

	/* pre-reserve vectors (i.e. local cache) for FIXED type if needed */
	if (apix_irm_reserve_fixed_vectors != 0) {
		cache_size = apix_irm_reserve_fixed_vectors;
		if ((cache_size == -1) ||
		    (cache_size > apix_irminfo.apix_ioapic_max_vectors))
			cache_size = apix_irminfo.apix_ioapic_max_vectors;
		total_avail_vectors -= cache_size;
		apix_irm_cache_size = cache_size;
	}

	if (total_avail_vectors <= 0) {
		APIX_IRM_DEBUG((CE_NOTE,
		    "apix_irm_init: invalid config parameters!"));
		return;
	}

	/* IRM pool is used only for MSI/X interrupts */
	apix_irm_params.iparams_types = DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX;
	apix_irm_params.iparams_total = total_avail_vectors;

	if (ndi_irm_create(dip, &apix_irm_params,
	    &apix_irm_pool_p) == NDI_SUCCESS) {
		/*
		 * re-direct psm_intr_ops to intercept FIXED
		 * interrupt allocation requests.
		 */
		psm_intr_ops_saved = psm_intr_ops;
		psm_intr_ops = apix_irm_intr_ops;
		/*
		 * re-direct psm_enable_intr()/psm_disable_intr() to
		 * intercept CPU offline/online requests.
		 */
		psm_disable_intr_saved = psm_disable_intr;
		psm_enable_intr_saved = psm_enable_intr;
		psm_enable_intr = apix_irm_enable_intr;
		psm_disable_intr = apix_irm_disable_intr;

		mutex_init(&apix_irm_lock, NULL, MUTEX_DRIVER, NULL);

		/*
		 * Set default alloc limit for non-IRM drivers
		 * to DDI_MIN_MSIX_ALLOC (currently defined as 8).
		 *
		 * NOTE: This is done here so that the limit of 8 vectors
		 * is applicable only with APIX module. For the old pcplusmp
		 * implementation, the current default of 2 (i.e
		 * DDI_DEFAULT_MSIX_ALLOC) is retained.
		 */
		if (ddi_msix_alloc_limit < DDI_MIN_MSIX_ALLOC)
			ddi_msix_alloc_limit = DDI_MIN_MSIX_ALLOC;
	} else {
		APIX_IRM_DEBUG((CE_NOTE,
		    "apix_irm_init: ndi_irm_create() failed"));
		apix_irm_pool_p = NULL;
	}
}

/*
 * Check if the PSM module is "APIX" type which supports IRM feature.
 * Returns 0 if it is not an APIX module.
 */
static int
apix_irm_chk_apix(void)
{
	ddi_intr_handle_impl_t	info_hdl;
	apic_get_type_t		type_info;

	if (!psm_intr_ops)
		return (0);

	bzero(&info_hdl, sizeof (ddi_intr_handle_impl_t));
	info_hdl.ih_private = &type_info;
	if (((*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_APIC_TYPE,
	    NULL)) != PSM_SUCCESS) {
		/* unknown type; assume not an APIX module */
		return (0);
	}
	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0)
		return (1);
	else
		return (0);
}

/*
 * This function intercepts PSM_INTR_OP_* requests to deal with
 * IRM pool maintainance for FIXED type interrupts. The following
 * commands are intercepted and the rest are simply passed back to
 * the original psm_intr_ops function:
 *	PSM_INTR_OP_ALLOC_VECTORS
 *	PSM_INTR_OP_FREE_VECTORS
 * Return value is either PSM_SUCCESS or PSM_FAILURE.
 */
int
apix_irm_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
	psm_intr_op_t op, int *result)
{
	switch (op) {
	case PSM_INTR_OP_ALLOC_VECTORS:
		if (handle->ih_type == DDI_INTR_TYPE_FIXED)
			return (apix_irm_alloc_fixed(dip, handle, result));
		else
			break;
	case PSM_INTR_OP_FREE_VECTORS:
		if (handle->ih_type == DDI_INTR_TYPE_FIXED)
			return (apix_irm_free_fixed(dip, handle, result));
		else
			break;
	default:
		break;
	}

	/* pass the request to APIX */
	return ((*psm_intr_ops_saved)(dip, handle, op, result));
}

/*
 * Allocate a FIXED type interrupt. The procedure for this
 * operation is as follows:
 *
 * 1) Check if this IRQ is shared (i.e. IRQ is already mapped
 *    and a vector has been already allocated). If so, then no
 *    new vector is needed and simply pass the request to APIX
 *    and return.
 * 2) Check the local cache pool for an available vector. If
 *    the cache is not empty then take it from there and simply
 *    pass the request to APIX and return.
 * 3) Otherwise, get a vector from the IRM pool by reducing the
 *    pool size by 1. If it is successful then pass the
 *    request to APIX module. Otherwise return PSM_FAILURE.
 */
int
apix_irm_alloc_fixed(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
	int *result)
{
	int	vector;
	uint_t	new_pool_size;
	int	ret;

	/*
	 * Check if this IRQ has been mapped (i.e. shared IRQ case)
	 * by doing PSM_INTR_OP_XLATE_VECTOR.
	 */
	ret = (*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_XLATE_VECTOR,
	    &vector);
	if (ret == PSM_SUCCESS) {
		APIX_IRM_DEBUG((CE_CONT,
		    "apix_irm_alloc_fixed: dip %p (%s) xlated vector 0x%x",
		    (void *)dip, ddi_driver_name(dip), vector));
		/* (1) mapping already exists; pass the request to PSM */
		return ((*psm_intr_ops_saved)(dip, handle,
		    PSM_INTR_OP_ALLOC_VECTORS, result));
	}

	/* check the local cache for an available vector */
	mutex_enter(&apix_irm_lock);
	if (apix_irm_cache_size) { /* cache is not empty */
		--apix_irm_cache_size;
		apix_irm_fixed_intr_vectors_used++;
		mutex_exit(&apix_irm_lock);
		/* (2) use the vector from the local cache */
		return ((*psm_intr_ops_saved)(dip, handle,
		    PSM_INTR_OP_ALLOC_VECTORS, result));
	}

	/* (3) get a vector from the IRM pool */

	new_pool_size = apix_irm_params.iparams_total - 1;

	APIX_IRM_DEBUG((CE_CONT, "apix_irm_alloc_fixed: dip %p (%s) resize pool"
	    " from %x to %x\n", (void *)dip, ddi_driver_name(dip),
	    apix_irm_pool_p->ipool_totsz, new_pool_size));

	if (ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size) ==
	    NDI_SUCCESS) {
		/* update the pool size info */
		apix_irm_params.iparams_total = new_pool_size;
		apix_irm_fixed_intr_vectors_used++;
		mutex_exit(&apix_irm_lock);
		return ((*psm_intr_ops_saved)(dip, handle,
		    PSM_INTR_OP_ALLOC_VECTORS, result));
	}

	mutex_exit(&apix_irm_lock);

	return (PSM_FAILURE);
}

/*
 * Free up the FIXED type interrupt.
 *
 * 1) If it is a shared vector then simply pass the request to
 *    APIX and return.
 * 2) Otherwise, if apix_irm_free_fixed_vector is not set then add the
 *    vector back to the IRM pool. Otherwise, keep it in the local cache.
 */
int
apix_irm_free_fixed(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
	int *result)
{
	int shared;
	int ret;
	uint_t new_pool_size;

	/* check if it is a shared vector */
	ret = (*psm_intr_ops_saved)(dip, handle,
	    PSM_INTR_OP_GET_SHARED, &shared);

	if ((ret == PSM_SUCCESS) && (shared > 0)) {
		/* (1) it is a shared vector; simply pass the request */
		APIX_IRM_DEBUG((CE_CONT, "apix_irm_free_fixed: dip %p (%s) "
		    "shared %d\n", (void *)dip, ddi_driver_name(dip), shared));
		return ((*psm_intr_ops_saved)(dip, handle,
		    PSM_INTR_OP_FREE_VECTORS, result));
	}

	ret = (*psm_intr_ops_saved)(dip, handle,
	    PSM_INTR_OP_FREE_VECTORS, result);

	if (ret == PSM_SUCCESS) {
		mutex_enter(&apix_irm_lock);
		if (apix_irm_free_fixed_vector) {
			/* (2) add the vector back to IRM pool */
			new_pool_size = apix_irm_params.iparams_total + 1;
			APIX_IRM_DEBUG((CE_CONT, "apix_irm_free_fixed: "
			    "dip %p (%s) resize pool from %x to %x\n",
			    (void *)dip, ddi_driver_name(dip),
			    apix_irm_pool_p->ipool_totsz, new_pool_size));
			if (ndi_irm_resize_pool(apix_irm_pool_p,
			    new_pool_size) == NDI_SUCCESS) {
				/* update the pool size info */
				apix_irm_params.iparams_total = new_pool_size;
			} else {
				cmn_err(CE_NOTE,
				    "apix_irm_free_fixed: failed to add"
				    " a vector to IRM pool");
			}
		} else {
			/* keep the vector in the local cache */
			apix_irm_cache_size += 1;
		}
		apix_irm_fixed_intr_vectors_used--;
		mutex_exit(&apix_irm_lock);
	}

	return (ret);
}

/*
 * Disable the CPU for interrupts. It is assumed that this is called to
 * offline/disable the CPU so that no interrupts are allocated on
 * that CPU. For IRM perspective, the interrupt vectors on this
 * CPU are to be excluded for any allocations.
 *
 * If APIX module is successful in migrating all the vectors
 * from this CPU then reduce the IRM pool size to exclude the
 * interrupt vectors for that CPU.
 */
int
apix_irm_disable_intr(processorid_t id)
{
	uint_t new_pool_size;

	/* Interrupt disabling for Suspend/Resume */
	if (apic_cpus[id].aci_status & APIC_CPU_SUSPEND)
		return ((*psm_disable_intr_saved)(id));

	mutex_enter(&apix_irm_lock);
	/*
	 * Don't remove the CPU from the IRM pool if we have CPU factor
	 * available.
	 */
	if ((apix_irm_cpu_factor > 0) && (apix_irm_cpu_factor_available > 0)) {
		apix_irm_cpu_factor_available--;
	} else {
		/* can't disable if there is only one CPU used */
		if (apix_irm_cpus_used == 1) {
			mutex_exit(&apix_irm_lock);
			return (PSM_FAILURE);
		}
		/* Calculate the new size for the IRM pool */
		new_pool_size = apix_irm_params.iparams_total -
		    apix_irminfo.apix_per_cpu_vectors;

		/* Apply the max. limit */
		if (apix_system_max_vectors > 0) {
			uint_t	max;

			max = apix_system_max_vectors -
			    apix_irm_fixed_intr_vectors_used -
			    apix_irm_cache_size;

			new_pool_size = MIN(new_pool_size, max);
		}

		if (new_pool_size == 0) {
			cmn_err(CE_WARN, "Invalid pool size 0 with "
			    "apix_system_max_vectors = %d",
			    apix_system_max_vectors);
			mutex_exit(&apix_irm_lock);
			return (PSM_FAILURE);
		}

		if (new_pool_size != apix_irm_params.iparams_total) {
			/* remove the CPU from the IRM pool */
			if (ndi_irm_resize_pool(apix_irm_pool_p,
			    new_pool_size) != NDI_SUCCESS) {
				mutex_exit(&apix_irm_lock);
				APIX_IRM_DEBUG((CE_NOTE,
				    "apix_irm_disable_intr: failed to resize"
				    " the IRM pool"));
				return (PSM_FAILURE);
			}
			/* update the pool size info */
			apix_irm_params.iparams_total = new_pool_size;
		}

		/* decrement the CPU count used by IRM pool */
		apix_irm_cpus_used--;
	}

	/*
	 * Now, disable the CPU for interrupts.
	 */
	if ((*psm_disable_intr_saved)(id) != PSM_SUCCESS) {
		APIX_IRM_DEBUG((CE_NOTE,
		    "apix_irm_disable_intr: failed to disable CPU interrupts"
		    " for CPU#%d", id));
		mutex_exit(&apix_irm_lock);
		return (PSM_FAILURE);
	}
	/* decrement the CPU count enabled for interrupts */
	apix_irm_max_cpus--;
	mutex_exit(&apix_irm_lock);
	return (PSM_SUCCESS);
}

/*
 * Enable the CPU for interrupts. It is assumed that this function is
 * called to enable/online the CPU so that interrupts could be assigned
 * to it. If successful, add available vectors for that CPU to the IRM
 * pool if apix_irm_cpu_factor is already satisfied.
 */
void
apix_irm_enable_intr(processorid_t id)
{
	uint_t new_pool_size;

	/* Interrupt enabling for Suspend/Resume */
	if (apic_cpus[id].aci_status & APIC_CPU_SUSPEND) {
		(*psm_enable_intr_saved)(id);
		return;
	}

	mutex_enter(&apix_irm_lock);

	/* enable the CPU for interrupts */
	(*psm_enable_intr_saved)(id);

	/* increment the number of CPUs enabled for interrupts */
	apix_irm_max_cpus++;

	ASSERT(apix_irminfo.apix_per_cpu_vectors > 0);

	/*
	 * Check if the apix_irm_cpu_factor is satisfied before.
	 * If satisfied, add the CPU to IRM pool.
	 */
	if ((apix_irm_cpu_factor > 0) &&
	    (apix_irm_cpu_factor_available < apix_irm_cpu_factor)) {
		/*
		 * Don't add the CPU to the IRM pool. Just update
		 * the available CPU factor.
		 */
		apix_irm_cpu_factor_available++;
		mutex_exit(&apix_irm_lock);
		return;
	}

	/*
	 * Add the CPU to the IRM pool.
	 */

	/* increment the CPU count used by IRM */
	apix_irm_cpus_used++;

	/* Calculate the new pool size */
	new_pool_size = apix_irm_params.iparams_total +
	    apix_irminfo.apix_per_cpu_vectors;

	/* Apply the max. limit */
	if (apix_system_max_vectors > 0) {
		uint_t	max;

		max = apix_system_max_vectors -
		    apix_irm_fixed_intr_vectors_used -
		    apix_irm_cache_size;

		new_pool_size = MIN(new_pool_size, max);
	}
	if (new_pool_size == apix_irm_params.iparams_total) {
		/* no change to pool size */
		mutex_exit(&apix_irm_lock);
		return;
	}
	if (new_pool_size < apix_irm_params.iparams_total) {
		cmn_err(CE_WARN, "new_pool_size %d is inconsistent "
		    "with irm_params.iparams_total %d",
		    new_pool_size, apix_irm_params.iparams_total);
		mutex_exit(&apix_irm_lock);
		return;
	}

	(void) ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size);

	/* update the pool size info */
	apix_irm_params.iparams_total = new_pool_size;

	mutex_exit(&apix_irm_lock);
}