seattle/os/seattle.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysmacros.h>
#include <sys/sunddi.h>
#include <sys/esunddi.h>

#include <sys/platform_module.h>
#include <sys/errno.h>
#include <sys/cpu_sgnblk_defs.h>
#include <sys/rmc_comm_dp.h>
#include <sys/rmc_comm_drvintf.h>
#include <sys/modctl.h>
#include <sys/lgrp.h>
#include <sys/memnode.h>
#include <sys/promif.h>

/* Anything related to shared i2c access applies to Seattle only */
#define	SHARED_MI2CV_PATH "/i2c@1f,530000"
static dev_info_t *shared_mi2cv_dip;
static kmutex_t mi2cv_mutex;

int (*p2get_mem_unum)(int, uint64_t, char *, int, int *);
static void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int);
int (*rmc_req_now)(rmc_comm_msg_t *, uint8_t) = NULL;

void
startup_platform(void)
{
	mutex_init(&mi2cv_mutex, NULL, NULL, NULL);
}

int
set_platform_tsb_spares()
{
	return (0);
}

void
set_platform_defaults(void)
{
	extern char *tod_module_name;
	/* Set appropriate tod module */
	if (tod_module_name == NULL)
		tod_module_name = "todm5823";

	cpu_sgn_func = cpu_sgn_update;
}

/*
 * these two dummy functions are loaded over the original
 * todm5823 set and clear_power_alarm functions. On Seattle
 * these functions are not supported, and thus we need to provide
 * dummy functions that just returns.
 * On Seattle, clock chip is not persistant across reboots,
 * and moreover it has a bug sending memory access.
 * This fix is done by writing over the original
 * tod_ops function pointer with our dummy replacement functions.
 */
/*ARGSUSED*/
static void
dummy_todm5823_set_power_alarm(timestruc_t ts)
{
}

static void
dummy_todm5823_clear_power_alarm(void)
{
}

/*
 * Definitions for accessing the pci config space of the isa node
 * of Southbridge.
 */
static ddi_acc_handle_t isa_handle = NULL;	/* handle for isa pci space */

/*
 * Definition for accessing rmclomv
 */
#define	RMCLOMV_PATHNAME	"/pseudo/rmclomv@0"

void
load_platform_drivers(void)
{
	dev_info_t	*rmclomv_dip;
	/*
	 * It is OK to return error because 'us' driver is not available
	 * in all clusters (e.g. missing in Core cluster).
	 */
	(void) i_ddi_attach_hw_nodes("us");


	/*
	 * mc-us3i must stay loaded for plat_get_mem_unum()
	 */
	if (i_ddi_attach_hw_nodes("mc-us3i") != DDI_SUCCESS)
		cmn_err(CE_WARN, "mc-us3i driver failed to install");
	(void) ddi_hold_driver(ddi_name_to_major("mc-us3i"));

	/*
	 * load the power button driver
	 */
	if (i_ddi_attach_hw_nodes("power") != DDI_SUCCESS)
		cmn_err(CE_WARN, "power button driver failed to install");
	else
		(void) ddi_hold_driver(ddi_name_to_major("power"));

	/*
	 * load the GPIO driver for the ALOM reset and watchdog lines
	 */
	if (i_ddi_attach_hw_nodes("pmugpio") != DDI_SUCCESS)
		cmn_err(CE_WARN, "pmugpio failed to install");
	else {
		extern int watchdog_enable, watchdog_available;
		extern int disable_watchdog_on_exit;

		/*
		 * Disable an active h/w watchdog timer upon exit to OBP.
		 */
		disable_watchdog_on_exit = 1;

		watchdog_enable = 1;
		watchdog_available = 1;
	}
	(void) ddi_hold_driver(ddi_name_to_major("pmugpio"));

	/*
	 * Figure out which mi2cv dip is shared with OBP for the nvram
	 * device, so the lock can be acquired.
	 */
	shared_mi2cv_dip = e_ddi_hold_devi_by_path(SHARED_MI2CV_PATH, 0);
	/*
	 * Load the environmentals driver (rmclomv)
	 *
	 * We need this driver to handle events from the RMC when state
	 * changes occur in the environmental data.
	 */
	if (i_ddi_attach_hw_nodes("rmc_comm") != DDI_SUCCESS) {
		cmn_err(CE_WARN, "rmc_comm failed to install");
	} else {
		(void) ddi_hold_driver(ddi_name_to_major("rmc_comm"));

		rmclomv_dip = e_ddi_hold_devi_by_path(RMCLOMV_PATHNAME, 0);
		if (rmclomv_dip == NULL) {
			cmn_err(CE_WARN, "Could not install rmclomv driver\n");
		}
	}

	/*
	 * These two dummy functions are loaded over the original
	 * todm5823 set and clear_power_alarm functions. On Seattle,
	 * these functionalities are not supported.
	 * The load_platform_drivers(void) is called from post_startup()
	 * which is after all the initialization of the tod module is
	 * finished, then we replace 2 of the tod_ops function pointers
	 * with our dummy version.
	 */
	tod_ops.tod_set_power_alarm = dummy_todm5823_set_power_alarm;
	tod_ops.tod_clear_power_alarm = dummy_todm5823_clear_power_alarm;

	/*
	 * create a handle to the rmc_comm_request_nowait() function
	 * inside the rmc_comm module.
	 *
	 * The Seattle/Boston todm5823 driver will use this handle to
	 * use the rmc_comm_request_nowait() function to send time/date
	 * updates to ALOM.
	 */
	rmc_req_now = (int (*)(rmc_comm_msg_t *, uint8_t))
	    modgetsymvalue("rmc_comm_request_nowait", 0);
}

/*
 * This routine is needed if a device error or timeout occurs before the
 * driver is loaded.
 */
/*ARGSUSED*/
int
plat_ide_chipreset(dev_info_t *dip, int chno)
{
	int	ret = DDI_SUCCESS;

	if (isa_handle == NULL) {
		return (DDI_FAILURE);
	}

	/*
	 * This will be filled in with the reset logic
	 * for the ULI1573 when that becomes available.
	 * currently this is just a stub.
	 */
	return (ret);
}


/*ARGSUSED*/
int
plat_cpu_poweron(struct cpu *cp)
{
	return (ENOTSUP);	/* not supported on this platform */
}

/*ARGSUSED*/
int
plat_cpu_poweroff(struct cpu *cp)
{
	return (ENOTSUP);	/* not supported on this platform */
}

/*ARGSUSED*/
void
plat_freelist_process(int mnode)
{
}

char *platform_module_list[] = {
	"mi2cv",
	"pca9556",
	(char *)0
};

/*ARGSUSED*/
void
plat_tod_fault(enum tod_fault_type tod_bad)
{
}

/*ARGSUSED*/
int
plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id,
    int flt_in_memory, ushort_t flt_status, char *buf, int buflen, int *lenp)
{
	if (flt_in_memory && (p2get_mem_unum != NULL))
		return (p2get_mem_unum(synd_code, P2ALIGN(flt_addr, 8),
		    buf, buflen, lenp));
	else
		return (ENOTSUP);
}

/*
 * This platform hook gets called from mc_add_mem_unum_label() in the mc-us3i
 * driver giving each platform the opportunity to add platform
 * specific label information to the unum for ECC error logging purposes.
 */
/*ARGSUSED*/
void
plat_add_mem_unum_label(char *unum, int mcid, int bank, int dimm)
{
	char old_unum[UNUM_NAMLEN];
	int printed;
	int buflen = UNUM_NAMLEN;
	strcpy(old_unum, unum);
	printed = snprintf(unum, buflen, "MB/P%d/B%d", mcid, bank);
	buflen -= printed;
	unum += printed;

	if (dimm != -1) {
		printed = snprintf(unum, buflen, "/D%d", dimm);
		buflen -= printed;
		unum += printed;
	}

	snprintf(unum, buflen, ": %s", old_unum);
}

/*ARGSUSED*/
int
plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
{
	if (snprintf(buf, buflen, "MB") >= buflen) {
		return (ENOSPC);
	} else {
		*lenp = strlen(buf);
		return (0);
	}
}

/*
 * Our nodename has been set, pass it along to the RMC.
 */
void
plat_nodename_set(void)
{
	rmc_comm_msg_t	req;	/* request */
	int (*rmc_req_res)(rmc_comm_msg_t *, rmc_comm_msg_t *, time_t) = NULL;

	/*
	 * find the symbol for the mailbox routine
	 */
	rmc_req_res = (int (*)(rmc_comm_msg_t *, rmc_comm_msg_t *, time_t))
	    modgetsymvalue("rmc_comm_request_response", 0);

	if (rmc_req_res == NULL) {
		return;
	}

	/*
	 * construct the message telling the RMC our nodename
	 */
	req.msg_type = DP_SET_CPU_NODENAME;
	req.msg_len = strlen(utsname.nodename) + 1;
	req.msg_bytes = 0;
	req.msg_buf = (caddr_t)utsname.nodename;

	/*
	 * ship it
	 */
	(void) (rmc_req_res)(&req, NULL, 2000);
}

sig_state_t current_sgn;

/*
 * cpu signatures - we're only interested in the overall system
 * "signature" on this platform - not individual cpu signatures
 */
/*ARGSUSED*/
static void
cpu_sgn_update(ushort_t sig, uchar_t state, uchar_t sub_state, int cpuid)
{
	dp_cpu_signature_t signature;
	rmc_comm_msg_t	req;	/* request */
	int (*rmc_req_now)(rmc_comm_msg_t *, uint8_t) = NULL;


	/*
	 * Differentiate a panic reboot from a non-panic reboot in the
	 * setting of the substate of the signature.
	 *
	 * If the new substate is REBOOT and we're rebooting due to a panic,
	 * then set the new substate to a special value indicating a panic
	 * reboot, SIGSUBST_PANIC_REBOOT.
	 *
	 * A panic reboot is detected by a current (previous) signature
	 * state of SIGST_EXIT, and a new signature substate of SIGSUBST_REBOOT.
	 * The domain signature state SIGST_EXIT is used as the panic flow
	 * progresses.
	 *
	 * At the end of the panic flow, the reboot occurs but we should know
	 * one that was involuntary, something that may be quite useful to know
	 * at OBP level.
	 */
	if (state == SIGST_EXIT && sub_state == SIGSUBST_REBOOT) {
		if (current_sgn.state_t.state == SIGST_EXIT &&
		    current_sgn.state_t.sub_state != SIGSUBST_REBOOT)
			sub_state = SIGSUBST_PANIC_REBOOT;
	}

	/*
	 * offline and detached states only apply to a specific cpu
	 * so ignore them.
	 */
	if (state == SIGST_OFFLINE || state == SIGST_DETACHED) {
		return;
	}

	current_sgn.signature = CPU_SIG_BLD(sig, state, sub_state);

	/*
	 * find the symbol for the mailbox routine
	 */
	rmc_req_now = (int (*)(rmc_comm_msg_t *, uint8_t))
	    modgetsymvalue("rmc_comm_request_nowait", 0);
	if (rmc_req_now == NULL) {
		return;
	}

	signature.cpu_id = -1;
	signature.sig = sig;
	signature.states = state;
	signature.sub_state = sub_state;
	req.msg_type = DP_SET_CPU_SIGNATURE;
	req.msg_len = (int)(sizeof (signature));
	req.msg_bytes = 0;
	req.msg_buf = (caddr_t)&signature;

	/*
	 * We need to tell the SP that the host is about to stop running.  The
	 * SP will then allow the date to be set at its console, it will change
	 * state of the activity indicator, it will display the correct host
	 * status, and it will stop sending console messages and alerts to the
	 * host communication channel.
	 *
	 * This requires the RMC_COMM_DREQ_URGENT as we want to
	 * be sure activity indicators will reflect the correct status.
	 *
	 * When sub_state SIGSUBST_DUMP is sent, the urgent flag
	 * (RMC_COMM_DREQ_URGENT) is not required as SIGSUBST_PANIC_REBOOT
	 * has already been sent and changed activity indicators.
	 */
	if (state == SIGST_EXIT && (sub_state == SIGSUBST_HALT ||
	    sub_state == SIGSUBST_REBOOT || sub_state == SIGSUBST_ENVIRON ||
	    sub_state == SIGSUBST_PANIC_REBOOT))
		(void) (rmc_req_now)(&req, RMC_COMM_DREQ_URGENT);
	else
		(void) (rmc_req_now)(&req, 0);
}

/*
 * Fiesta support for lgroups.
 *
 * On fiesta platform, an lgroup platform handle == CPU id
 */

/*
 * Macro for extracting the CPU number from the CPU id
 */
#define	CPUID_TO_LGRP(id)	((id) & 0x7)
#define	PLATFORM_MC_SHIFT	36

/*
 * Return the platform handle for the lgroup containing the given CPU
 */
void *
plat_lgrp_cpu_to_hand(processorid_t id)
{
	return ((void *)(uintptr_t)CPUID_TO_LGRP(id));
}

/*
 * Platform specific lgroup initialization
 */
void
plat_lgrp_init(void)
{
	pnode_t		curnode;
	char		tmp_name[sizeof (OBP_CPU) + 1];  /* extra padding */
	int		portid;
	int		cpucnt = 0;
	int		max_portid = -1;
	extern uint32_t lgrp_expand_proc_thresh;
	extern uint32_t lgrp_expand_proc_diff;
	extern pgcnt_t	lgrp_mem_free_thresh;
	extern uint32_t lgrp_loadavg_tolerance;
	extern uint32_t lgrp_loadavg_max_effect;
	extern uint32_t lgrp_load_thresh;
	extern lgrp_mem_policy_t  lgrp_mem_policy_root;

	/*
	 * Count the number of CPUs installed to determine if
	 * NUMA optimization should be enabled or not.
	 *
	 * All CPU nodes reside in the root node and have a
	 * device type "cpu".
	 */
	curnode = prom_rootnode();
	for (curnode = prom_childnode(curnode); curnode;
	    curnode = prom_nextnode(curnode)) {
		bzero(tmp_name, sizeof (tmp_name));
		if (prom_bounded_getprop(curnode, OBP_DEVICETYPE, tmp_name,
		    sizeof (OBP_CPU)) == -1 || strcmp(tmp_name, OBP_CPU) != 0)
			continue;

		cpucnt++;

		if (prom_getprop(curnode, "portid", (caddr_t)&portid) !=
		    -1 && portid > max_portid)
			max_portid = portid;
	}
	if (cpucnt <= 1)
		max_mem_nodes = 1;
	else if (max_portid >= 0 && max_portid < MAX_MEM_NODES)
		max_mem_nodes = max_portid + 1;

	/*
	 * Set tuneables for fiesta architecture
	 *
	 * lgrp_expand_proc_thresh is the minimum load on the lgroups
	 * this process is currently running on before considering
	 * expanding threads to another lgroup.
	 *
	 * lgrp_expand_proc_diff determines how much less the remote lgroup
	 * must be loaded before expanding to it.
	 *
	 * Optimize for memory bandwidth by spreading multi-threaded
	 * program to different lgroups.
	 */
	lgrp_expand_proc_thresh = lgrp_loadavg_max_effect - 1;
	lgrp_expand_proc_diff = lgrp_loadavg_max_effect / 2;
	lgrp_loadavg_tolerance = lgrp_loadavg_max_effect / 2;
	lgrp_mem_free_thresh = 1;	/* home lgrp must have some memory */
	lgrp_expand_proc_thresh = lgrp_loadavg_max_effect - 1;
	lgrp_mem_policy_root = LGRP_MEM_POLICY_NEXT;
	lgrp_load_thresh = 0;

	mem_node_pfn_shift = PLATFORM_MC_SHIFT - MMU_PAGESHIFT;
}

/*
 * Return latency between "from" and "to" lgroups
 *
 * This latency number can only be used for relative comparison
 * between lgroups on the running system, cannot be used across platforms,
 * and may not reflect the actual latency.  It is platform and implementation
 * specific, so platform gets to decide its value.  It would be nice if the
 * number was at least proportional to make comparisons more meaningful though.
 * NOTE: The numbers below are supposed to be load latencies for uncached
 * memory divided by 10.
 */
int
plat_lgrp_latency(void *from, void *to)
{
	/*
	 * Return remote latency when there are more than two lgroups
	 * (root and child) and getting latency between two different
	 * lgroups or root is involved
	 */
	if (lgrp_optimizations() && (from != to || from ==
	    (void *) LGRP_DEFAULT_HANDLE || to == (void *) LGRP_DEFAULT_HANDLE))
		return (17);
	else
		return (12);
}

int
plat_pfn_to_mem_node(pfn_t pfn)
{
	ASSERT(max_mem_nodes > 1);
	return (pfn >> mem_node_pfn_shift);
}

/*
 * Assign memnode to lgroups
 */
void
plat_fill_mc(pnode_t nodeid)
{
	int		portid;

	/*
	 * Memory controller portid == global CPU id
	 */
	if ((prom_getprop(nodeid, "portid", (caddr_t)&portid) == -1) ||
	    (portid < 0))
		return;

	if (portid < max_mem_nodes)
		plat_assign_lgrphand_to_mem_node((lgrp_handle_t)portid, portid);
}

/*
 * Common locking enter code
 */
void
plat_setprop_enter(void)
{
	mutex_enter(&mi2cv_mutex);
}

/*
 * Common locking exit code
 */
void
plat_setprop_exit(void)
{
	mutex_exit(&mi2cv_mutex);
}

/*
 * Called by mi2cv driver
 */
void
plat_shared_i2c_enter(dev_info_t *i2cnexus_dip)
{
	if (i2cnexus_dip == shared_mi2cv_dip) {
		plat_setprop_enter();
	}
}

/*
 * Called by mi2cv driver
 */
void
plat_shared_i2c_exit(dev_info_t *i2cnexus_dip)
{
	if (i2cnexus_dip == shared_mi2cv_dip) {
		plat_setprop_exit();
	}
}
/*
 * Called by todm5823 driver
 */
void
plat_rmc_comm_req(struct rmc_comm_msg *request)
{
	if (rmc_req_now)
		(void) rmc_req_now(request, 0);
}