/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern void bcopy32_il(uint64_t, uint64_t); extern void flush_cache_il(void); extern void drmach_sleep_il(void); typedef struct { struct drmach_node *node; void *data; } drmach_node_walk_args_t; typedef struct drmach_node { void *here; pnode_t (*get_dnode)(struct drmach_node *node); int (*walk)(struct drmach_node *node, void *data, int (*cb)(drmach_node_walk_args_t *args)); dev_info_t *(*n_getdip)(struct drmach_node *node); int (*n_getproplen)(struct drmach_node *node, char *name, int *len); int (*n_getprop)(struct drmach_node *node, char *name, void *buf, int len); int (*get_parent)(struct drmach_node *node, struct drmach_node *pnode); } drmach_node_t; typedef struct { int min_index; int max_index; int arr_sz; drmachid_t *arr; } drmach_array_t; typedef struct { void *isa; void (*dispose)(drmachid_t); sbd_error_t *(*release)(drmachid_t); sbd_error_t *(*status)(drmachid_t, drmach_status_t *); char name[MAXNAMELEN]; } drmach_common_t; typedef struct { uint32_t core_present; uint32_t core_hotadded; uint32_t core_started; } drmach_cmp_t; typedef struct { drmach_common_t cm; int bnum; int assigned; int powered; int connected; int cond; drmach_node_t *tree; drmach_array_t *devices; int boot_board; /* if board exists on bootup */ drmach_cmp_t cores[OPL_MAX_COREID_PER_BOARD]; } drmach_board_t; typedef struct { drmach_common_t cm; drmach_board_t *bp; int unum; int portid; int busy; int powered; const char *type; drmach_node_t *node; } drmach_device_t; typedef struct drmach_cpu { drmach_device_t dev; processorid_t cpuid; int sb; int chipid; int coreid; int strandid; int status; #define OPL_CPU_HOTADDED 1 } drmach_cpu_t; typedef struct drmach_mem { drmach_device_t dev; uint64_t slice_base; uint64_t slice_size; uint64_t base_pa; /* lowest installed memory base */ uint64_t nbytes; /* size of installed memory */ struct memlist *memlist; } drmach_mem_t; typedef struct drmach_io { drmach_device_t dev; int channel; int leaf; } drmach_io_t; typedef struct drmach_domain_info { uint32_t floating; int allow_dr; } drmach_domain_info_t; drmach_domain_info_t drmach_domain; typedef struct { int flags; drmach_device_t *dp; sbd_error_t *err; dev_info_t *dip; } drmach_config_args_t; typedef struct { drmach_board_t *obj; int ndevs; void *a; sbd_error_t *(*found)(void *a, const char *, int, drmachid_t); sbd_error_t *err; } drmach_board_cb_data_t; static drmach_array_t *drmach_boards; static sbd_error_t *drmach_device_new(drmach_node_t *, drmach_board_t *, int, drmachid_t *); static sbd_error_t *drmach_cpu_new(drmach_device_t *, drmachid_t *); static sbd_error_t *drmach_mem_new(drmach_device_t *, drmachid_t *); static sbd_error_t *drmach_io_new(drmach_device_t *, drmachid_t *); static dev_info_t *drmach_node_ddi_get_dip(drmach_node_t *np); static int drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len); static int drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len); static int drmach_get_portid(drmach_node_t *); static sbd_error_t *drmach_i_status(drmachid_t, drmach_status_t *); static int opl_check_dr_status(); static void drmach_io_dispose(drmachid_t); static sbd_error_t *drmach_io_release(drmachid_t); static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *); static int drmach_init(void); static void drmach_fini(void); static void drmach_swap_pa(drmach_mem_t *, drmach_mem_t *); static drmach_board_t *drmach_get_board_by_bnum(int); /* options for the second argument in drmach_add_remove_cpu() */ #define HOTADD_CPU 1 #define HOTREMOVE_CPU 2 #define ON_BOARD_CORE_NUM(x) (((uint_t)(x) / OPL_MAX_STRANDID_PER_CORE) & \ (OPL_MAX_COREID_PER_BOARD - 1)) extern struct cpu *SIGBCPU; static int drmach_name2type_idx(char *); static drmach_board_t *drmach_board_new(int, int); #ifdef DEBUG #define DRMACH_PR if (drmach_debug) printf int drmach_debug = 1; /* set to non-zero to enable debug messages */ #else #define DRMACH_PR _NOTE(CONSTANTCONDITION) if (0) printf #endif /* DEBUG */ #define DRMACH_OBJ(id) ((drmach_common_t *)id) #define DRMACH_NULL_ID(id) ((id) == 0) #define DRMACH_IS_BOARD_ID(id) \ ((id != 0) && \ (DRMACH_OBJ(id)->isa == (void *)drmach_board_new)) #define DRMACH_IS_CPU_ID(id) \ ((id != 0) && \ (DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new)) #define DRMACH_IS_MEM_ID(id) \ ((id != 0) && \ (DRMACH_OBJ(id)->isa == (void *)drmach_mem_new)) #define DRMACH_IS_IO_ID(id) \ ((id != 0) && \ (DRMACH_OBJ(id)->isa == (void *)drmach_io_new)) #define DRMACH_IS_DEVICE_ID(id) \ ((id != 0) && \ (DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new || \ DRMACH_OBJ(id)->isa == (void *)drmach_mem_new || \ DRMACH_OBJ(id)->isa == (void *)drmach_io_new)) #define DRMACH_IS_ID(id) \ ((id != 0) && \ (DRMACH_OBJ(id)->isa == (void *)drmach_board_new || \ DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new || \ DRMACH_OBJ(id)->isa == (void *)drmach_mem_new || \ DRMACH_OBJ(id)->isa == (void *)drmach_io_new)) #define DRMACH_INTERNAL_ERROR() \ drerr_new(1, EOPL_INTERNAL, drmach_ie_fmt, __LINE__) static char *drmach_ie_fmt = "drmach.c %d"; static struct { const char *name; const char *type; sbd_error_t *(*new)(drmach_device_t *, drmachid_t *); } drmach_name2type[] = { { "cpu", DRMACH_DEVTYPE_CPU, drmach_cpu_new }, { "pseudo-mc", DRMACH_DEVTYPE_MEM, drmach_mem_new }, { "pci", DRMACH_DEVTYPE_PCI, drmach_io_new }, }; /* utility */ #define MBYTE (1048576ull) /* * drmach autoconfiguration data structures and interfaces */ extern struct mod_ops mod_miscops; static struct modlmisc modlmisc = { &mod_miscops, "OPL DR 1.1" }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modlmisc, NULL }; static krwlock_t drmach_boards_rwlock; typedef const char *fn_t; int _init(void) { int err; if ((err = drmach_init()) != 0) { return (err); } if ((err = mod_install(&modlinkage)) != 0) { drmach_fini(); } return (err); } int _fini(void) { int err; if ((err = mod_remove(&modlinkage)) == 0) drmach_fini(); return (err); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } struct drmach_mc_lookup { int bnum; drmach_board_t *bp; dev_info_t *dip; /* rv - set if found */ }; #define _ptob64(p) ((uint64_t)(p) << PAGESHIFT) #define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT)) static int drmach_setup_mc_info(dev_info_t *dip, drmach_mem_t *mp) { uint64_t memory_ranges[128]; int len; struct memlist *ml; int rv; hwd_sb_t *hwd; hwd_memory_t *pm; len = sizeof (memory_ranges); if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "sb-mem-ranges", (caddr_t)&memory_ranges[0], &len) != DDI_PROP_SUCCESS) { mp->slice_base = 0; mp->slice_size = 0; return (-1); } mp->slice_base = memory_ranges[0]; mp->slice_size = memory_ranges[1]; if (!mp->dev.bp->boot_board) { int i; rv = opl_read_hwd(mp->dev.bp->bnum, NULL, NULL, NULL, &hwd); if (rv != 0) { return (-1); } ml = NULL; pm = &hwd->sb_cmu.cmu_memory; for (i = 0; i < HWD_MAX_MEM_CHUNKS; i++) { if (pm->mem_chunks[i].chnk_size > 0) { ml = memlist_add_span(ml, pm->mem_chunks[i].chnk_start_address, pm->mem_chunks[i].chnk_size); } } } else { /* * we intersect phys_install to get base_pa. * This only works at bootup time. */ memlist_read_lock(); ml = memlist_dup(phys_install); memlist_read_unlock(); ml = memlist_del_span(ml, 0ull, mp->slice_base); if (ml) { uint64_t basepa, endpa; endpa = _ptob64(physmax + 1); basepa = mp->slice_base + mp->slice_size; ml = memlist_del_span(ml, basepa, endpa - basepa); } } if (ml) { uint64_t nbytes = 0; struct memlist *p; for (p = ml; p; p = p->next) { nbytes += p->size; } if ((mp->nbytes = nbytes) > 0) mp->base_pa = ml->address; else mp->base_pa = 0; mp->memlist = ml; } else { mp->base_pa = 0; mp->nbytes = 0; } return (0); } struct drmach_hotcpu { drmach_board_t *bp; int bnum; int core_id; int rv; int option; }; static int drmach_cpu_cb(dev_info_t *dip, void *arg) { struct drmach_hotcpu *p = (struct drmach_hotcpu *)arg; char name[OBP_MAXDRVNAME]; int len = OBP_MAXDRVNAME; int bnum, core_id, strand_id; drmach_board_t *bp; if (dip == ddi_root_node()) { return (DDI_WALK_CONTINUE); } if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "name", (caddr_t)name, &len) != DDI_PROP_SUCCESS) { return (DDI_WALK_PRUNECHILD); } /* only cmp has board number */ bnum = -1; len = sizeof (bnum); if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_BOARDNUM, (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) { bnum = -1; } if (strcmp(name, "cmp") == 0) { if (bnum != p->bnum) return (DDI_WALK_PRUNECHILD); return (DDI_WALK_CONTINUE); } /* we have already pruned all unwanted cores and cpu's above */ if (strcmp(name, "core") == 0) { return (DDI_WALK_CONTINUE); } if (strcmp(name, "cpu") == 0) { processorid_t cpuid; len = sizeof (cpuid); if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "cpuid", (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) { p->rv = -1; return (DDI_WALK_TERMINATE); } core_id = p->core_id; bnum = LSB_ID(cpuid); if (ON_BOARD_CORE_NUM(cpuid) != core_id) return (DDI_WALK_CONTINUE); bp = p->bp; ASSERT(bnum == bp->bnum); if (p->option == HOTADD_CPU) { if (prom_hotaddcpu(cpuid) != 0) { p->rv = -1; return (DDI_WALK_TERMINATE); } strand_id = STRAND_ID(cpuid); bp->cores[core_id].core_hotadded |= (1 << strand_id); } else if (p->option == HOTREMOVE_CPU) { if (prom_hotremovecpu(cpuid) != 0) { p->rv = -1; return (DDI_WALK_TERMINATE); } strand_id = STRAND_ID(cpuid); bp->cores[core_id].core_hotadded &= ~(1 << strand_id); } return (DDI_WALK_CONTINUE); } return (DDI_WALK_PRUNECHILD); } static int drmach_add_remove_cpu(int bnum, int core_id, int option) { struct drmach_hotcpu arg; drmach_board_t *bp; bp = drmach_get_board_by_bnum(bnum); ASSERT(bp); arg.bp = bp; arg.bnum = bnum; arg.core_id = core_id; arg.rv = 0; arg.option = option; ddi_walk_devs(ddi_root_node(), drmach_cpu_cb, (void *)&arg); return (arg.rv); } struct drmach_setup_core_arg { drmach_board_t *bp; }; static int drmach_setup_core_cb(dev_info_t *dip, void *arg) { struct drmach_setup_core_arg *p = (struct drmach_setup_core_arg *)arg; char name[OBP_MAXDRVNAME]; int len = OBP_MAXDRVNAME; int bnum; int core_id, strand_id; if (dip == ddi_root_node()) { return (DDI_WALK_CONTINUE); } if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "name", (caddr_t)name, &len) != DDI_PROP_SUCCESS) { return (DDI_WALK_PRUNECHILD); } /* only cmp has board number */ bnum = -1; len = sizeof (bnum); if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_BOARDNUM, (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) { bnum = -1; } if (strcmp(name, "cmp") == 0) { if (bnum != p->bp->bnum) return (DDI_WALK_PRUNECHILD); return (DDI_WALK_CONTINUE); } /* we have already pruned all unwanted cores and cpu's above */ if (strcmp(name, "core") == 0) { return (DDI_WALK_CONTINUE); } if (strcmp(name, "cpu") == 0) { processorid_t cpuid; len = sizeof (cpuid); if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "cpuid", (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) { return (DDI_WALK_TERMINATE); } bnum = LSB_ID(cpuid); ASSERT(bnum == p->bp->bnum); core_id = ON_BOARD_CORE_NUM(cpuid); strand_id = STRAND_ID(cpuid); p->bp->cores[core_id].core_present |= (1 << strand_id); return (DDI_WALK_CONTINUE); } return (DDI_WALK_PRUNECHILD); } static void drmach_setup_core_info(drmach_board_t *obj) { struct drmach_setup_core_arg arg; int i; for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) { obj->cores[i].core_present = 0; obj->cores[i].core_hotadded = 0; obj->cores[i].core_started = 0; } arg.bp = obj; ddi_walk_devs(ddi_root_node(), drmach_setup_core_cb, (void *)&arg); for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) { if (obj->boot_board) { obj->cores[i].core_hotadded = obj->cores[i].core_started = obj->cores[i].core_present; } } } /* * drmach_node_* routines serve the purpose of separating the * rest of the code from the device tree and OBP. This is necessary * because of In-Kernel-Probing. Devices probed after stod, are probed * by the in-kernel-prober, not OBP. These devices, therefore, do not * have dnode ids. */ typedef struct { drmach_node_walk_args_t *nwargs; int (*cb)(drmach_node_walk_args_t *args); int err; } drmach_node_ddi_walk_args_t; static int drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg) { drmach_node_ddi_walk_args_t *nargs; nargs = (drmach_node_ddi_walk_args_t *)arg; /* * dip doesn't have to be held here as we are called * from ddi_walk_devs() which holds the dip. */ nargs->nwargs->node->here = (void *)dip; nargs->err = nargs->cb(nargs->nwargs); /* * Set "here" to NULL so that unheld dip is not accessible * outside ddi_walk_devs() */ nargs->nwargs->node->here = NULL; if (nargs->err) return (DDI_WALK_TERMINATE); else return (DDI_WALK_CONTINUE); } static int drmach_node_ddi_walk(drmach_node_t *np, void *data, int (*cb)(drmach_node_walk_args_t *args)) { drmach_node_walk_args_t args; drmach_node_ddi_walk_args_t nargs; /* initialized args structure for callback */ args.node = np; args.data = data; nargs.nwargs = &args; nargs.cb = cb; nargs.err = 0; /* * Root node doesn't have to be held in any way. */ ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs); return (nargs.err); } static int drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp) { dev_info_t *ndip; static char *fn = "drmach_node_ddi_get_parent"; ndip = np->n_getdip(np); if (ndip == NULL) { cmn_err(CE_WARN, "%s: NULL dip", fn); return (-1); } bcopy(np, pp, sizeof (drmach_node_t)); pp->here = (void *)ddi_get_parent(ndip); if (pp->here == NULL) { cmn_err(CE_WARN, "%s: NULL parent dip", fn); return (-1); } return (0); } /*ARGSUSED*/ static pnode_t drmach_node_ddi_get_dnode(drmach_node_t *np) { return ((pnode_t)NULL); } static drmach_node_t * drmach_node_new(void) { drmach_node_t *np; np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP); np->get_dnode = drmach_node_ddi_get_dnode; np->walk = drmach_node_ddi_walk; np->n_getdip = drmach_node_ddi_get_dip; np->n_getproplen = drmach_node_ddi_get_proplen; np->n_getprop = drmach_node_ddi_get_prop; np->get_parent = drmach_node_ddi_get_parent; return (np); } static void drmach_node_dispose(drmach_node_t *np) { kmem_free(np, sizeof (*np)); } static dev_info_t * drmach_node_ddi_get_dip(drmach_node_t *np) { return ((dev_info_t *)np->here); } static int drmach_node_walk(drmach_node_t *np, void *param, int (*cb)(drmach_node_walk_args_t *args)) { return (np->walk(np, param, cb)); } static int drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len) { int rv = 0; dev_info_t *ndip; static char *fn = "drmach_node_ddi_get_prop"; ndip = np->n_getdip(np); if (ndip == NULL) { cmn_err(CE_WARN, "%s: NULL dip", fn); rv = -1; } else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS, name, (caddr_t)buf, &len) != DDI_PROP_SUCCESS) { rv = -1; } return (rv); } static int drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len) { int rv = 0; dev_info_t *ndip; ndip = np->n_getdip(np); if (ndip == NULL) { rv = -1; } else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS, name, len) != DDI_PROP_SUCCESS) { rv = -1; } return (rv); } static drmachid_t drmach_node_dup(drmach_node_t *np) { drmach_node_t *dup; dup = drmach_node_new(); dup->here = np->here; dup->get_dnode = np->get_dnode; dup->walk = np->walk; dup->n_getdip = np->n_getdip; dup->n_getproplen = np->n_getproplen; dup->n_getprop = np->n_getprop; dup->get_parent = np->get_parent; return (dup); } /* * drmach_array provides convenient array construction, access, * bounds checking and array destruction logic. */ static drmach_array_t * drmach_array_new(int min_index, int max_index) { drmach_array_t *arr; arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP); arr->arr_sz = (max_index - min_index + 1) * sizeof (void *); if (arr->arr_sz > 0) { arr->min_index = min_index; arr->max_index = max_index; arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP); return (arr); } else { kmem_free(arr, sizeof (*arr)); return (0); } } static int drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val) { if (idx < arr->min_index || idx > arr->max_index) return (-1); else { arr->arr[idx - arr->min_index] = val; return (0); } /*NOTREACHED*/ } static int drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val) { if (idx < arr->min_index || idx > arr->max_index) return (-1); else { *val = arr->arr[idx - arr->min_index]; return (0); } /*NOTREACHED*/ } static int drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val) { int rv; *idx = arr->min_index; while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL) *idx += 1; return (rv); } static int drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val) { int rv; *idx += 1; while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL) *idx += 1; return (rv); } static void drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t)) { drmachid_t val; int idx; int rv; rv = drmach_array_first(arr, &idx, &val); while (rv == 0) { (*disposer)(val); rv = drmach_array_next(arr, &idx, &val); } kmem_free(arr->arr, arr->arr_sz); kmem_free(arr, sizeof (*arr)); } static drmach_board_t * drmach_get_board_by_bnum(int bnum) { drmachid_t id; if (drmach_array_get(drmach_boards, bnum, &id) == 0) return ((drmach_board_t *)id); else return (NULL); } static pnode_t drmach_node_get_dnode(drmach_node_t *np) { return (np->get_dnode(np)); } /*ARGSUSED*/ sbd_error_t * drmach_configure(drmachid_t id, int flags) { drmach_device_t *dp; sbd_error_t *err = NULL; dev_info_t *rdip; dev_info_t *fdip = NULL; if (DRMACH_IS_CPU_ID(id)) { return (NULL); } if (!DRMACH_IS_DEVICE_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); dp = id; rdip = dp->node->n_getdip(dp->node); ASSERT(rdip); ASSERT(e_ddi_branch_held(rdip)); if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) { char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP); dev_info_t *dip = (fdip != NULL) ? fdip : rdip; (void) ddi_pathname(dip, path); err = drerr_new(1, EOPL_DRVFAIL, path); kmem_free(path, MAXPATHLEN); /* If non-NULL, fdip is returned held and must be released */ if (fdip != NULL) ddi_release_devi(fdip); } return (err); } static sbd_error_t * drmach_device_new(drmach_node_t *node, drmach_board_t *bp, int portid, drmachid_t *idp) { int i; int rv; drmach_device_t proto; sbd_error_t *err; char name[OBP_MAXDRVNAME]; rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME); if (rv) { /* every node is expected to have a name */ err = drerr_new(1, EOPL_GETPROP, "device node %s: property %s", ddi_node_name(node->n_getdip(node)), "name"); return (err); } /* * The node currently being examined is not listed in the name2type[] * array. In this case, the node is no interest to drmach. Both * dp and err are initialized here to yield nothing (no device or * error structure) for this case. */ i = drmach_name2type_idx(name); if (i < 0) { *idp = (drmachid_t)0; return (NULL); } /* device specific new function will set unum */ bzero(&proto, sizeof (proto)); proto.type = drmach_name2type[i].type; proto.bp = bp; proto.node = node; proto.portid = portid; return (drmach_name2type[i].new(&proto, idp)); } static void drmach_device_dispose(drmachid_t id) { drmach_device_t *self = id; self->cm.dispose(id); } static drmach_board_t * drmach_board_new(int bnum, int boot_board) { static sbd_error_t *drmach_board_release(drmachid_t); static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *); drmach_board_t *bp; bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP); bp->cm.isa = (void *)drmach_board_new; bp->cm.release = drmach_board_release; bp->cm.status = drmach_board_status; (void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name)); bp->bnum = bnum; bp->devices = NULL; bp->connected = boot_board; bp->tree = drmach_node_new(); bp->assigned = boot_board; bp->powered = boot_board; bp->boot_board = boot_board; /* * If this is not bootup initialization, we have to wait till * IKP sets up the device nodes in drmach_board_connect(). */ if (boot_board) drmach_setup_core_info(bp); (void) drmach_array_set(drmach_boards, bnum, bp); return (bp); } static void drmach_board_dispose(drmachid_t id) { drmach_board_t *bp; ASSERT(DRMACH_IS_BOARD_ID(id)); bp = id; if (bp->tree) drmach_node_dispose(bp->tree); if (bp->devices) drmach_array_dispose(bp->devices, drmach_device_dispose); kmem_free(bp, sizeof (*bp)); } static sbd_error_t * drmach_board_status(drmachid_t id, drmach_status_t *stat) { sbd_error_t *err = NULL; drmach_board_t *bp; if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); bp = id; stat->assigned = bp->assigned; stat->powered = bp->powered; stat->busy = 0; /* assume not busy */ stat->configured = 0; /* assume not configured */ stat->empty = 0; stat->cond = bp->cond = SBD_COND_OK; (void) strncpy(stat->type, "System Brd", sizeof (stat->type)); stat->info[0] = '\0'; if (bp->devices) { int rv; int d_idx; drmachid_t d_id; rv = drmach_array_first(bp->devices, &d_idx, &d_id); while (rv == 0) { drmach_status_t d_stat; err = drmach_i_status(d_id, &d_stat); if (err) break; stat->busy |= d_stat.busy; stat->configured |= d_stat.configured; rv = drmach_array_next(bp->devices, &d_idx, &d_id); } } return (err); } int drmach_board_is_floating(drmachid_t id) { drmach_board_t *bp; if (!DRMACH_IS_BOARD_ID(id)) return (0); bp = (drmach_board_t *)id; return ((drmach_domain.floating & (1 << bp->bnum)) ? 1 : 0); } static int drmach_init(void) { dev_info_t *rdip; int i, rv, len; int *floating; rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL); drmach_boards = drmach_array_new(0, MAX_BOARDS - 1); rdip = ddi_root_node(); if (ddi_getproplen(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS, "floating-boards", &len) != DDI_PROP_SUCCESS) { cmn_err(CE_WARN, "Cannot get floating-boards proplen\n"); } else { floating = (int *)kmem_alloc(len, KM_SLEEP); rv = ddi_prop_op(DDI_DEV_T_ANY, rdip, PROP_LEN_AND_VAL_BUF, DDI_PROP_DONTPASS, "floating-boards", (caddr_t)floating, &len); if (rv != DDI_PROP_SUCCESS) { cmn_err(CE_WARN, "Cannot get floating-boards prop\n"); } else { drmach_domain.floating = 0; for (i = 0; i < len / sizeof (int); i++) { drmach_domain.floating |= (1 << floating[i]); } } kmem_free(floating, len); } drmach_domain.allow_dr = opl_check_dr_status(); rdip = ddi_get_child(ddi_root_node()); do { int bnum; drmachid_t id; bnum = -1; bnum = ddi_getprop(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS, OBP_BOARDNUM, -1); if (bnum == -1) continue; if (drmach_array_get(drmach_boards, bnum, &id) == -1) { cmn_err(CE_WARN, "Device node 0x%p has invalid " "property value, %s=%d", (void *)rdip, OBP_BOARDNUM, bnum); goto error; } else if (id == NULL) { (void) drmach_board_new(bnum, 1); } } while ((rdip = ddi_get_next_sibling(rdip)) != NULL); opl_hold_devtree(); /* * Initialize the IKP feature. * * This can be done only after DR has acquired a hold on all the * device nodes that are interesting to IKP. */ if (opl_init_cfg() != 0) { cmn_err(CE_WARN, "DR - IKP initialization failed"); opl_release_devtree(); goto error; } return (0); error: drmach_array_dispose(drmach_boards, drmach_board_dispose); rw_destroy(&drmach_boards_rwlock); return (ENXIO); } static void drmach_fini(void) { rw_enter(&drmach_boards_rwlock, RW_WRITER); drmach_array_dispose(drmach_boards, drmach_board_dispose); drmach_boards = NULL; rw_exit(&drmach_boards_rwlock); /* * Walk immediate children of the root devinfo node * releasing holds acquired on branches in drmach_init() */ opl_release_devtree(); rw_destroy(&drmach_boards_rwlock); } /* * Each system board contains 2 Oberon PCI bridge and * 1 CMUCH. * Each oberon has 2 channels. * Each channel has 2 pci-ex leaf. * Each CMUCH has 1 pci bus. * * * Device Path: * /pci@,reg * * where * portid[10] = 0 * portid[9:0] = LLEAF_ID[9:0] of the Oberon Channel * * LLEAF_ID[9:8] = 0 * LLEAF_ID[8:4] = LSB_ID[4:0] * LLEAF_ID[3:1] = IO Channel#[2:0] (0,1,2,3 for Oberon) * channel 4 is pcicmu * LLEAF_ID[0] = PCI Leaf Number (0 for leaf-A, 1 for leaf-B) * * Properties: * name = pci * device_type = "pciex" * board# = LSBID * reg = int32 * 2, Oberon CSR space of the leaf and the UBC space * portid = Jupiter Bus Device ID ((LSB_ID << 3)|pciport#) */ static sbd_error_t * drmach_io_new(drmach_device_t *proto, drmachid_t *idp) { drmach_io_t *ip; int portid; portid = proto->portid; ASSERT(portid != -1); proto->unum = portid & (MAX_IO_UNITS_PER_BOARD - 1); ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP); bcopy(proto, &ip->dev, sizeof (ip->dev)); ip->dev.node = drmach_node_dup(proto->node); ip->dev.cm.isa = (void *)drmach_io_new; ip->dev.cm.dispose = drmach_io_dispose; ip->dev.cm.release = drmach_io_release; ip->dev.cm.status = drmach_io_status; ip->channel = (portid >> 1) & 0x7; ip->leaf = (portid & 0x1); (void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d", ip->dev.type, ip->dev.unum); *idp = (drmachid_t)ip; return (NULL); } static void drmach_io_dispose(drmachid_t id) { drmach_io_t *self; ASSERT(DRMACH_IS_IO_ID(id)); self = id; if (self->dev.node) drmach_node_dispose(self->dev.node); kmem_free(self, sizeof (*self)); } /*ARGSUSED*/ sbd_error_t * drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts) { drmach_board_t *bp = (drmach_board_t *)id; sbd_error_t *err = NULL; /* allow status and ncm operations to always succeed */ if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) { return (NULL); } /* check all other commands for the required option string */ if ((opts->size > 0) && (opts->copts != NULL)) { DRMACH_PR("platform options: %s\n", opts->copts); if (strstr(opts->copts, "opldr") == NULL) { err = drerr_new(1, EOPL_SUPPORT, NULL); } } else { err = drerr_new(1, EOPL_SUPPORT, NULL); } if (!err && id && DRMACH_IS_BOARD_ID(id)) { switch (cmd) { case SBD_CMD_TEST: case SBD_CMD_STATUS: case SBD_CMD_GETNCM: break; case SBD_CMD_CONNECT: if (bp->connected) err = drerr_new(0, ESBD_STATE, NULL); else if (!drmach_domain.allow_dr) err = drerr_new(1, EOPL_SUPPORT, NULL); break; case SBD_CMD_DISCONNECT: if (!bp->connected) err = drerr_new(0, ESBD_STATE, NULL); else if (!drmach_domain.allow_dr) err = drerr_new(1, EOPL_SUPPORT, NULL); break; default: if (!drmach_domain.allow_dr) err = drerr_new(1, EOPL_SUPPORT, NULL); break; } } return (err); } /*ARGSUSED*/ sbd_error_t * drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts) { return (NULL); } sbd_error_t * drmach_board_assign(int bnum, drmachid_t *id) { sbd_error_t *err = NULL; rw_enter(&drmach_boards_rwlock, RW_WRITER); if (drmach_array_get(drmach_boards, bnum, id) == -1) { err = drerr_new(1, EOPL_BNUM, "%d", bnum); } else { drmach_board_t *bp; if (*id) rw_downgrade(&drmach_boards_rwlock); bp = *id; if (!(*id)) bp = *id = (drmachid_t)drmach_board_new(bnum, 0); bp->assigned = 1; } rw_exit(&drmach_boards_rwlock); return (err); } /*ARGSUSED*/ sbd_error_t * drmach_board_connect(drmachid_t id, drmach_opts_t *opts) { extern int cpu_alljupiter; drmach_board_t *obj = (drmach_board_t *)id; unsigned cpu_impl; if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); if (opl_probe_sb(obj->bnum, &cpu_impl) != 0) return (drerr_new(1, EOPL_PROBE, NULL)); if (cpu_alljupiter) { if (cpu_impl & (1 << OLYMPUS_C_IMPL)) { (void) opl_unprobe_sb(obj->bnum); return (drerr_new(1, EOPL_MIXED_CPU, NULL)); } } (void) prom_attach_notice(obj->bnum); drmach_setup_core_info(obj); obj->connected = 1; return (NULL); } static int drmach_cache_flush_flag[NCPU]; /*ARGSUSED*/ static void drmach_flush_cache(uint64_t id, uint64_t dummy) { extern void cpu_flush_ecache(void); cpu_flush_ecache(); drmach_cache_flush_flag[id] = 0; } static void drmach_flush_all() { cpuset_t xc_cpuset; int i; xc_cpuset = cpu_ready_set; for (i = 0; i < NCPU; i++) { if (CPU_IN_SET(xc_cpuset, i)) { drmach_cache_flush_flag[i] = 1; xc_one(i, drmach_flush_cache, i, 0); while (drmach_cache_flush_flag[i]) { DELAY(1000); } } } } static int drmach_disconnect_cpus(drmach_board_t *bp) { int i, bnum; bnum = bp->bnum; for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) { if (bp->cores[i].core_present) { if (bp->cores[i].core_started) return (-1); if (bp->cores[i].core_hotadded) { if (drmach_add_remove_cpu(bnum, i, HOTREMOVE_CPU)) { cmn_err(CE_WARN, "Failed to remove " "CMP %d on board %d\n", i, bnum); return (-1); } } } } return (0); } /*ARGSUSED*/ sbd_error_t * drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts) { drmach_board_t *obj; int rv = 0; sbd_error_t *err = NULL; if (DRMACH_NULL_ID(id)) return (NULL); if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); obj = (drmach_board_t *)id; if (drmach_disconnect_cpus(obj)) { err = drerr_new(1, EOPL_DEPROBE, obj->cm.name); return (err); } rv = opl_unprobe_sb(obj->bnum); if (rv == 0) { (void) prom_detach_notice(obj->bnum); obj->connected = 0; } else err = drerr_new(1, EOPL_DEPROBE, obj->cm.name); return (err); } static int drmach_get_portid(drmach_node_t *np) { int portid; char type[OBP_MAXPROPNAME]; if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0) return (portid); /* * Get the device_type property to see if we should * continue processing this node. */ if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0) return (-1); if (strcmp(type, OPL_CPU_NODE) == 0) { /* * We return cpuid because it has no portid */ if (np->n_getprop(np, "cpuid", &portid, sizeof (portid)) == 0) return (portid); } return (-1); } /* * This is a helper function to determine if a given * node should be considered for a dr operation according * to predefined dr type nodes and the node's name. * Formal Parameter : The name of a device node. * Return Value: -1, name does not map to a valid dr type. * A value greater or equal to 0, name is a valid dr type. */ static int drmach_name2type_idx(char *name) { int index, ntypes; if (name == NULL) return (-1); /* * Determine how many possible types are currently supported * for dr. */ ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]); /* Determine if the node's name correspond to a predefined type. */ for (index = 0; index < ntypes; index++) { if (strcmp(drmach_name2type[index].name, name) == 0) /* The node is an allowed type for dr. */ return (index); } /* * If the name of the node does not map to any of the * types in the array drmach_name2type then the node is not of * interest to dr. */ return (-1); } /* * there is some complication on OPL: * - pseudo-mc nodes do not have portid property * - portid[9:5] of cmp node is LSB #, portid[7:3] of pci is LSB# * - cmp has board# * - core and cpu nodes do not have portid and board# properties * starcat uses portid to derive the board# but that does not work * for us. starfire reads board# property to filter the devices. * That does not work either. So for these specific device, * we use specific hard coded methods to get the board# - * cpu: LSB# = CPUID[9:5] */ static int drmach_board_find_devices_cb(drmach_node_walk_args_t *args) { drmach_node_t *node = args->node; drmach_board_cb_data_t *data = args->data; drmach_board_t *obj = data->obj; int rv, portid; int bnum; drmachid_t id; drmach_device_t *device; char name[OBP_MAXDRVNAME]; portid = drmach_get_portid(node); /* * core, cpu and pseudo-mc do not have portid * we use cpuid as the portid of the cpu node * for pseudo-mc, we do not use portid info. */ rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME); if (rv) return (0); rv = node->n_getprop(node, OBP_BOARDNUM, &bnum, sizeof (bnum)); if (rv) { /* * cpu does not have board# property. We use * CPUID[9:5] */ if (strcmp("cpu", name) == 0) { bnum = (portid >> 5) & 0x1f; } else return (0); } if (bnum != obj->bnum) return (0); if (drmach_name2type_idx(name) < 0) { return (0); } /* * Create a device data structure from this node data. * The call may yield nothing if the node is not of interest * to drmach. */ data->err = drmach_device_new(node, obj, portid, &id); if (data->err) return (-1); else if (!id) { /* * drmach_device_new examined the node we passed in * and determined that it was one not of interest to * drmach. So, it is skipped. */ return (0); } rv = drmach_array_set(obj->devices, data->ndevs++, id); if (rv) { data->err = DRMACH_INTERNAL_ERROR(); return (-1); } device = id; data->err = (*data->found)(data->a, device->type, device->unum, id); return (data->err == NULL ? 0 : -1); } sbd_error_t * drmach_board_find_devices(drmachid_t id, void *a, sbd_error_t *(*found)(void *a, const char *, int, drmachid_t)) { drmach_board_t *bp = (drmach_board_t *)id; sbd_error_t *err; int max_devices; int rv; drmach_board_cb_data_t data; if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); max_devices = MAX_CPU_UNITS_PER_BOARD; max_devices += MAX_MEM_UNITS_PER_BOARD; max_devices += MAX_IO_UNITS_PER_BOARD; bp->devices = drmach_array_new(0, max_devices); if (bp->tree == NULL) bp->tree = drmach_node_new(); data.obj = bp; data.ndevs = 0; data.found = found; data.a = a; data.err = NULL; rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb); if (rv == 0) err = NULL; else { drmach_array_dispose(bp->devices, drmach_device_dispose); bp->devices = NULL; if (data.err) err = data.err; else err = DRMACH_INTERNAL_ERROR(); } return (err); } int drmach_board_lookup(int bnum, drmachid_t *id) { int rv = 0; rw_enter(&drmach_boards_rwlock, RW_READER); if (drmach_array_get(drmach_boards, bnum, id)) { *id = 0; rv = -1; } rw_exit(&drmach_boards_rwlock); return (rv); } sbd_error_t * drmach_board_name(int bnum, char *buf, int buflen) { (void) snprintf(buf, buflen, "SB%d", bnum); return (NULL); } sbd_error_t * drmach_board_poweroff(drmachid_t id) { drmach_board_t *bp; sbd_error_t *err; drmach_status_t stat; if (DRMACH_NULL_ID(id)) return (NULL); if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); bp = id; err = drmach_board_status(id, &stat); if (!err) { if (stat.configured || stat.busy) err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name); else { bp->powered = 0; } } return (err); } sbd_error_t * drmach_board_poweron(drmachid_t id) { drmach_board_t *bp; if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); bp = id; bp->powered = 1; return (NULL); } static sbd_error_t * drmach_board_release(drmachid_t id) { if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); return (NULL); } /*ARGSUSED*/ sbd_error_t * drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force) { return (NULL); } sbd_error_t * drmach_board_unassign(drmachid_t id) { drmach_board_t *bp; sbd_error_t *err; drmach_status_t stat; if (DRMACH_NULL_ID(id)) return (NULL); if (!DRMACH_IS_BOARD_ID(id)) { return (drerr_new(0, EOPL_INAPPROP, NULL)); } bp = id; rw_enter(&drmach_boards_rwlock, RW_WRITER); err = drmach_board_status(id, &stat); if (err) { rw_exit(&drmach_boards_rwlock); return (err); } if (stat.configured || stat.busy) { err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name); } else { if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0) err = DRMACH_INTERNAL_ERROR(); else drmach_board_dispose(bp); } rw_exit(&drmach_boards_rwlock); return (err); } /* * We have to do more on OPL - e.g. set up sram tte, read cpuid, strand id, * implementation #, etc */ static sbd_error_t * drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp) { static void drmach_cpu_dispose(drmachid_t); static sbd_error_t *drmach_cpu_release(drmachid_t); static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *); int portid; drmach_cpu_t *cp = NULL; /* portid is CPUID of the node */ portid = proto->portid; ASSERT(portid != -1); /* unum = (CMP/CHIP ID) + (ON_BOARD_CORE_NUM * MAX_CMPID_PER_BOARD) */ proto->unum = ((portid/OPL_MAX_CPUID_PER_CMP) & (OPL_MAX_CMPID_PER_BOARD - 1)) + ((portid & (OPL_MAX_CPUID_PER_CMP - 1)) * (OPL_MAX_CMPID_PER_BOARD)); cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP); bcopy(proto, &cp->dev, sizeof (cp->dev)); cp->dev.node = drmach_node_dup(proto->node); cp->dev.cm.isa = (void *)drmach_cpu_new; cp->dev.cm.dispose = drmach_cpu_dispose; cp->dev.cm.release = drmach_cpu_release; cp->dev.cm.status = drmach_cpu_status; (void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d", cp->dev.type, cp->dev.unum); /* * CPU ID representation * CPUID[9:5] = SB# * CPUID[4:3] = Chip# * CPUID[2:1] = Core# (Only 2 core for OPL) * CPUID[0:0] = Strand# */ /* * reg property of the strand contains strand ID * reg property of the parent node contains core ID * We should use them. */ cp->cpuid = portid; cp->sb = (portid >> 5) & 0x1f; cp->chipid = (portid >> 3) & 0x3; cp->coreid = (portid >> 1) & 0x3; cp->strandid = portid & 0x1; *idp = (drmachid_t)cp; return (NULL); } static void drmach_cpu_dispose(drmachid_t id) { drmach_cpu_t *self; ASSERT(DRMACH_IS_CPU_ID(id)); self = id; if (self->dev.node) drmach_node_dispose(self->dev.node); kmem_free(self, sizeof (*self)); } static int drmach_cpu_start(struct cpu *cp) { int cpuid = cp->cpu_id; extern int restart_other_cpu(int); ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0); cp->cpu_flags &= ~CPU_POWEROFF; /* * NOTE: restart_other_cpu pauses cpus during the * slave cpu start. This helps to quiesce the * bus traffic a bit which makes the tick sync * routine in the prom more robust. */ DRMACH_PR("COLD START for cpu (%d)\n", cpuid); (void) restart_other_cpu(cpuid); return (0); } static sbd_error_t * drmach_cpu_release(drmachid_t id) { if (!DRMACH_IS_CPU_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); return (NULL); } static sbd_error_t * drmach_cpu_status(drmachid_t id, drmach_status_t *stat) { drmach_cpu_t *cp; drmach_device_t *dp; ASSERT(DRMACH_IS_CPU_ID(id)); cp = (drmach_cpu_t *)id; dp = &cp->dev; stat->assigned = dp->bp->assigned; stat->powered = dp->bp->powered; mutex_enter(&cpu_lock); stat->configured = (cpu_get(cp->cpuid) != NULL); mutex_exit(&cpu_lock); stat->busy = dp->busy; (void) strncpy(stat->type, dp->type, sizeof (stat->type)); stat->info[0] = '\0'; return (NULL); } sbd_error_t * drmach_cpu_disconnect(drmachid_t id) { if (!DRMACH_IS_CPU_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); return (NULL); } sbd_error_t * drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid) { drmach_cpu_t *cpu; if (!DRMACH_IS_CPU_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); cpu = (drmach_cpu_t *)id; /* get from cpu directly on OPL */ *cpuid = cpu->cpuid; return (NULL); } sbd_error_t * drmach_cpu_get_impl(drmachid_t id, int *ip) { drmach_device_t *cpu; drmach_node_t *np; drmach_node_t pp; int impl; char type[OBP_MAXPROPNAME]; if (!DRMACH_IS_CPU_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); cpu = id; np = cpu->node; if (np->get_parent(np, &pp) != 0) { return (DRMACH_INTERNAL_ERROR()); } /* the parent should be core */ if (pp.n_getprop(&pp, "device_type", &type, sizeof (type)) != 0) { return (drerr_new(0, EOPL_GETPROP, NULL)); } if (strcmp(type, OPL_CORE_NODE) == 0) { if (pp.n_getprop(&pp, "implementation#", &impl, sizeof (impl)) != 0) { return (drerr_new(0, EOPL_GETPROP, NULL)); } } else { return (DRMACH_INTERNAL_ERROR()); } *ip = impl; return (NULL); } sbd_error_t * drmach_get_dip(drmachid_t id, dev_info_t **dip) { drmach_device_t *dp; if (!DRMACH_IS_DEVICE_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); dp = id; *dip = dp->node->n_getdip(dp->node); return (NULL); } sbd_error_t * drmach_io_is_attached(drmachid_t id, int *yes) { drmach_device_t *dp; dev_info_t *dip; int state; if (!DRMACH_IS_IO_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); dp = id; dip = dp->node->n_getdip(dp->node); if (dip == NULL) { *yes = 0; return (NULL); } state = ddi_get_devstate(dip); *yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) || (state == DDI_DEVSTATE_UP)); return (NULL); } struct drmach_io_cb { char *name; /* name of the node */ int (*func)(dev_info_t *); int rv; dev_info_t *dip; }; #define DRMACH_IO_POST_ATTACH 0 #define DRMACH_IO_PRE_RELEASE 1 static int drmach_io_cb_check(dev_info_t *dip, void *arg) { struct drmach_io_cb *p = (struct drmach_io_cb *)arg; char name[OBP_MAXDRVNAME]; int len = OBP_MAXDRVNAME; if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "name", (caddr_t)name, &len) != DDI_PROP_SUCCESS) { return (DDI_WALK_PRUNECHILD); } if (strcmp(name, p->name) == 0) { ndi_hold_devi(dip); p->dip = dip; return (DDI_WALK_TERMINATE); } return (DDI_WALK_CONTINUE); } static int drmach_console_ops(drmachid_t *id, int state) { drmach_io_t *obj = (drmach_io_t *)id; struct drmach_io_cb arg; int (*msudetp)(dev_info_t *); int (*msuattp)(dev_info_t *); dev_info_t *dip, *pdip; int circ; /* 4 is pcicmu channel */ if (obj->channel != 4) return (0); arg.name = "serial"; arg.func = NULL; if (state == DRMACH_IO_PRE_RELEASE) { msudetp = (int (*)(dev_info_t *)) modgetsymvalue("oplmsu_dr_detach", 0); if (msudetp != NULL) arg.func = msudetp; } else if (state == DRMACH_IO_POST_ATTACH) { msuattp = (int (*)(dev_info_t *)) modgetsymvalue("oplmsu_dr_attach", 0); if (msuattp != NULL) arg.func = msuattp; } else { return (0); } if (arg.func == NULL) { return (0); } arg.rv = 0; arg.dip = NULL; dip = obj->dev.node->n_getdip(obj->dev.node); if (pdip = ddi_get_parent(dip)) { ndi_hold_devi(pdip); ndi_devi_enter(pdip, &circ); } else { /* this cannot happen unless something bad happens */ return (-1); } ddi_walk_devs(dip, drmach_io_cb_check, (void *)&arg); ndi_devi_exit(pdip, circ); ndi_rele_devi(pdip); if (arg.dip) { arg.rv = (*arg.func)(arg.dip); ndi_rele_devi(arg.dip); } else { arg.rv = -1; } return (arg.rv); } sbd_error_t * drmach_io_pre_release(drmachid_t id) { int rv; if (!DRMACH_IS_IO_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); rv = drmach_console_ops(id, DRMACH_IO_PRE_RELEASE); if (rv != 0) cmn_err(CE_WARN, "IO callback failed in pre-release\n"); return (NULL); } static sbd_error_t * drmach_io_release(drmachid_t id) { if (!DRMACH_IS_IO_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); return (NULL); } sbd_error_t * drmach_io_unrelease(drmachid_t id) { if (!DRMACH_IS_IO_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); return (NULL); } /*ARGSUSED*/ sbd_error_t * drmach_io_post_release(drmachid_t id) { return (NULL); } /*ARGSUSED*/ sbd_error_t * drmach_io_post_attach(drmachid_t id) { int rv; if (!DRMACH_IS_IO_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); rv = drmach_console_ops(id, DRMACH_IO_POST_ATTACH); if (rv != 0) cmn_err(CE_WARN, "IO callback failed in post-attach\n"); return (0); } static sbd_error_t * drmach_io_status(drmachid_t id, drmach_status_t *stat) { drmach_device_t *dp; sbd_error_t *err; int configured; ASSERT(DRMACH_IS_IO_ID(id)); dp = id; err = drmach_io_is_attached(id, &configured); if (err) return (err); stat->assigned = dp->bp->assigned; stat->powered = dp->bp->powered; stat->configured = (configured != 0); stat->busy = dp->busy; (void) strncpy(stat->type, dp->type, sizeof (stat->type)); stat->info[0] = '\0'; return (NULL); } static sbd_error_t * drmach_mem_new(drmach_device_t *proto, drmachid_t *idp) { static void drmach_mem_dispose(drmachid_t); static sbd_error_t *drmach_mem_release(drmachid_t); static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *); dev_info_t *dip; int rv; drmach_mem_t *mp; rv = 0; if ((proto->node->n_getproplen(proto->node, "mc-addr", &rv) < 0) || (rv <= 0)) { *idp = (drmachid_t)0; return (NULL); } mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP); proto->unum = 0; bcopy(proto, &mp->dev, sizeof (mp->dev)); mp->dev.node = drmach_node_dup(proto->node); mp->dev.cm.isa = (void *)drmach_mem_new; mp->dev.cm.dispose = drmach_mem_dispose; mp->dev.cm.release = drmach_mem_release; mp->dev.cm.status = drmach_mem_status; (void) snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s", mp->dev.type); dip = mp->dev.node->n_getdip(mp->dev.node); if (drmach_setup_mc_info(dip, mp) != 0) { return (drerr_new(1, EOPL_MC_SETUP, NULL)); } /* make sure we do not create memoryless nodes */ if (mp->nbytes == 0) { *idp = (drmachid_t)NULL; kmem_free(mp, sizeof (drmach_mem_t)); } else *idp = (drmachid_t)mp; return (NULL); } static void drmach_mem_dispose(drmachid_t id) { drmach_mem_t *mp; ASSERT(DRMACH_IS_MEM_ID(id)); mp = id; if (mp->dev.node) drmach_node_dispose(mp->dev.node); if (mp->memlist) { memlist_delete(mp->memlist); mp->memlist = NULL; } kmem_free(mp, sizeof (*mp)); } sbd_error_t * drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size) { pfn_t basepfn = (pfn_t)(basepa >> PAGESHIFT); pgcnt_t npages = (pgcnt_t)(size >> PAGESHIFT); int rv; ASSERT(size != 0); if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); rv = kcage_range_add(basepfn, npages, KCAGE_DOWN); if (rv == ENOMEM) { cmn_err(CE_WARN, "%lu megabytes not available to kernel cage", (ulong_t)(size == 0 ? 0 : size / MBYTE)); } else if (rv != 0) { /* catch this in debug kernels */ ASSERT(0); cmn_err(CE_WARN, "unexpected kcage_range_add return value %d", rv); } if (rv) { return (DRMACH_INTERNAL_ERROR()); } else return (NULL); } sbd_error_t * drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size) { pfn_t basepfn = (pfn_t)(basepa >> PAGESHIFT); pgcnt_t npages = (pgcnt_t)(size >> PAGESHIFT); int rv; if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); if (size > 0) { rv = kcage_range_delete_post_mem_del(basepfn, npages); if (rv != 0) { cmn_err(CE_WARN, "unexpected kcage_range_delete_post_mem_del" " return value %d", rv); return (DRMACH_INTERNAL_ERROR()); } } return (NULL); } sbd_error_t * drmach_mem_disable(drmachid_t id) { if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); else { drmach_flush_all(); return (NULL); } } sbd_error_t * drmach_mem_enable(drmachid_t id) { if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); else return (NULL); } sbd_error_t * drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem) { drmach_mem_t *mp; if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); mp = (drmach_mem_t *)id; /* * This is only used by dr to round up/down the memory * for copying. Our unit of memory isolation is 64 MB. */ mem->mi_alignment_mask = (64 * 1024 * 1024 - 1); mem->mi_basepa = mp->base_pa; mem->mi_size = mp->nbytes; mem->mi_slice_size = mp->slice_size; return (NULL); } sbd_error_t * drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa) { drmach_mem_t *mp; if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); mp = (drmach_mem_t *)id; *pa = mp->base_pa; return (NULL); } sbd_error_t * drmach_mem_get_memlist(drmachid_t id, struct memlist **ml) { drmach_mem_t *mem; #ifdef DEBUG int rv; #endif struct memlist *mlist; if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); mem = (drmach_mem_t *)id; mlist = memlist_dup(mem->memlist); #ifdef DEBUG /* * Make sure the incoming memlist doesn't already * intersect with what's present in the system (phys_install). */ memlist_read_lock(); rv = memlist_intersect(phys_install, mlist); memlist_read_unlock(); if (rv) { DRMACH_PR("Derived memlist intersects with phys_install\n"); memlist_dump(mlist); DRMACH_PR("phys_install memlist:\n"); memlist_dump(phys_install); memlist_delete(mlist); return (DRMACH_INTERNAL_ERROR()); } DRMACH_PR("Derived memlist:"); memlist_dump(mlist); #endif *ml = mlist; return (NULL); } sbd_error_t * drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes) { drmach_mem_t *mem; if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); mem = (drmach_mem_t *)id; *bytes = mem->slice_size; return (NULL); } /* ARGSUSED */ processorid_t drmach_mem_cpu_affinity(drmachid_t id) { return (CPU_CURRENT); } static sbd_error_t * drmach_mem_release(drmachid_t id) { if (!DRMACH_IS_MEM_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); return (NULL); } static sbd_error_t * drmach_mem_status(drmachid_t id, drmach_status_t *stat) { drmach_mem_t *dp; uint64_t pa, slice_size; struct memlist *ml; ASSERT(DRMACH_IS_MEM_ID(id)); dp = id; /* get starting physical address of target memory */ pa = dp->base_pa; /* round down to slice boundary */ slice_size = dp->slice_size; pa &= ~(slice_size - 1); /* stop at first span that is in slice */ memlist_read_lock(); for (ml = phys_install; ml; ml = ml->next) if (ml->address >= pa && ml->address < pa + slice_size) break; memlist_read_unlock(); stat->assigned = dp->dev.bp->assigned; stat->powered = dp->dev.bp->powered; stat->configured = (ml != NULL); stat->busy = dp->dev.busy; (void) strncpy(stat->type, dp->dev.type, sizeof (stat->type)); stat->info[0] = '\0'; return (NULL); } sbd_error_t * drmach_board_deprobe(drmachid_t id) { drmach_board_t *bp; if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); bp = id; cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum); if (bp->tree) { drmach_node_dispose(bp->tree); bp->tree = NULL; } if (bp->devices) { drmach_array_dispose(bp->devices, drmach_device_dispose); bp->devices = NULL; } bp->boot_board = 0; return (NULL); } /*ARGSUSED*/ static sbd_error_t * drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts) { drmach_board_t *bp = (drmach_board_t *)id; sbd_error_t *err = NULL; int rv; unsigned cpu_impl; if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); DRMACH_PR("calling opl_probe_board for bnum=%d\n", bp->bnum); rv = opl_probe_sb(bp->bnum, &cpu_impl); if (rv != 0) { err = drerr_new(1, EOPL_PROBE, bp->cm.name); return (err); } return (err); } /*ARGSUSED*/ static sbd_error_t * drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts) { drmach_board_t *bp; sbd_error_t *err = NULL; int rv; if (!DRMACH_IS_BOARD_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); bp = (drmach_board_t *)id; cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum); rv = opl_unprobe_sb(bp->bnum); if (rv != 0) { err = drerr_new(1, EOPL_DEPROBE, bp->cm.name); } return (err); } /*ARGSUSED*/ sbd_error_t * drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts) { struct memlist *ml; uint64_t src_pa; uint64_t dst_pa; uint64_t dst; dst_pa = va_to_pa(&dst); memlist_read_lock(); for (ml = phys_install; ml; ml = ml->next) { uint64_t nbytes; src_pa = ml->address; nbytes = ml->size; while (nbytes != 0ull) { /* copy 32 bytes at arc_pa to dst_pa */ bcopy32_il(src_pa, dst_pa); /* increment by 32 bytes */ src_pa += (4 * sizeof (uint64_t)); /* decrement by 32 bytes */ nbytes -= (4 * sizeof (uint64_t)); } } memlist_read_unlock(); return (NULL); } static struct { const char *name; sbd_error_t *(*handler)(drmachid_t id, drmach_opts_t *opts); } drmach_pt_arr[] = { { "readmem", drmach_pt_readmem }, { "ikprobe", drmach_pt_ikprobe }, { "ikdeprobe", drmach_pt_ikdeprobe }, /* the following line must always be last */ { NULL, NULL } }; /*ARGSUSED*/ sbd_error_t * drmach_passthru(drmachid_t id, drmach_opts_t *opts) { int i; sbd_error_t *err; i = 0; while (drmach_pt_arr[i].name != NULL) { int len = strlen(drmach_pt_arr[i].name); if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0) break; i += 1; } if (drmach_pt_arr[i].name == NULL) err = drerr_new(0, EOPL_UNKPTCMD, opts->copts); else err = (*drmach_pt_arr[i].handler)(id, opts); return (err); } sbd_error_t * drmach_release(drmachid_t id) { drmach_common_t *cp; if (!DRMACH_IS_DEVICE_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); cp = id; return (cp->release(id)); } sbd_error_t * drmach_status(drmachid_t id, drmach_status_t *stat) { drmach_common_t *cp; sbd_error_t *err; rw_enter(&drmach_boards_rwlock, RW_READER); if (!DRMACH_IS_ID(id)) { rw_exit(&drmach_boards_rwlock); return (drerr_new(0, EOPL_NOTID, NULL)); } cp = (drmach_common_t *)id; err = cp->status(id, stat); rw_exit(&drmach_boards_rwlock); return (err); } static sbd_error_t * drmach_i_status(drmachid_t id, drmach_status_t *stat) { drmach_common_t *cp; if (!DRMACH_IS_ID(id)) return (drerr_new(0, EOPL_NOTID, NULL)); cp = id; return (cp->status(id, stat)); } /*ARGSUSED*/ sbd_error_t * drmach_unconfigure(drmachid_t id, int flags) { drmach_device_t *dp; dev_info_t *rdip, *fdip = NULL; char name[OBP_MAXDRVNAME]; int rv; if (DRMACH_IS_CPU_ID(id)) return (NULL); if (!DRMACH_IS_DEVICE_ID(id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); dp = id; rdip = dp->node->n_getdip(dp->node); ASSERT(rdip); rv = dp->node->n_getprop(dp->node, "name", name, OBP_MAXDRVNAME); if (rv) return (NULL); /* * Note: FORCE flag is no longer necessary under devfs */ ASSERT(e_ddi_branch_held(rdip)); if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) { sbd_error_t *err; char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP); /* * If non-NULL, fdip is returned held and must be released. */ if (fdip != NULL) { (void) ddi_pathname(fdip, path); ndi_rele_devi(fdip); } else { (void) ddi_pathname(rdip, path); } err = drerr_new(1, EOPL_DRVFAIL, path); kmem_free(path, MAXPATHLEN); return (err); } return (NULL); } int drmach_cpu_poweron(struct cpu *cp) { int bnum, cpuid, onb_core_num, strand_id; drmach_board_t *bp; DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id); cpuid = cp->cpu_id; bnum = LSB_ID(cpuid); onb_core_num = ON_BOARD_CORE_NUM(cpuid); strand_id = STRAND_ID(cpuid); bp = drmach_get_board_by_bnum(bnum); ASSERT(bp); if (bp->cores[onb_core_num].core_hotadded == 0) { if (drmach_add_remove_cpu(bnum, onb_core_num, HOTADD_CPU) != 0) { cmn_err(CE_WARN, "Failed to add CMP %d on board %d\n", onb_core_num, bnum); return (EIO); } } ASSERT(MUTEX_HELD(&cpu_lock)); if (drmach_cpu_start(cp) != 0) { if (bp->cores[onb_core_num].core_started == 0) { /* * we must undo the hotadd or no one will do that * If this fails, we will do this again in * drmach_board_disconnect. */ if (drmach_add_remove_cpu(bnum, onb_core_num, HOTREMOVE_CPU) != 0) { cmn_err(CE_WARN, "Failed to remove CMP %d " "on board %d\n", onb_core_num, bnum); } } return (EBUSY); } else { bp->cores[onb_core_num].core_started |= (1 << strand_id); return (0); } } int drmach_cpu_poweroff(struct cpu *cp) { int rv = 0; processorid_t cpuid = cp->cpu_id; DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id); ASSERT(MUTEX_HELD(&cpu_lock)); /* * Capture all CPUs (except for detaching proc) to prevent * crosscalls to the detaching proc until it has cleared its * bit in cpu_ready_set. * * The CPU's remain paused and the prom_mutex is known to be free. * This prevents the x-trap victim from blocking when doing prom * IEEE-1275 calls at a high PIL level. */ promsafe_pause_cpus(); /* * Quiesce interrupts on the target CPU. We do this by setting * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to * prevent it from receiving cross calls and cross traps. * This prevents the processor from receiving any new soft interrupts. */ mp_cpu_quiesce(cp); rv = prom_stopcpu_bycpuid(cpuid); if (rv == 0) cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF; start_cpus(); if (rv == 0) { int bnum, onb_core_num, strand_id; drmach_board_t *bp; CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid); bnum = LSB_ID(cpuid); onb_core_num = ON_BOARD_CORE_NUM(cpuid); strand_id = STRAND_ID(cpuid); bp = drmach_get_board_by_bnum(bnum); ASSERT(bp); bp->cores[onb_core_num].core_started &= ~(1 << strand_id); if (bp->cores[onb_core_num].core_started == 0) { if (drmach_add_remove_cpu(bnum, onb_core_num, HOTREMOVE_CPU) != 0) { cmn_err(CE_WARN, "Failed to remove CMP %d LSB " "%d\n", onb_core_num, bnum); return (EIO); } } } return (rv); } /*ARGSUSED*/ int drmach_verify_sr(dev_info_t *dip, int sflag) { return (0); } void drmach_suspend_last(void) { } void drmach_resume_first(void) { } /* * Log a DR sysevent. * Return value: 0 success, non-zero failure. */ int drmach_log_sysevent(int board, char *hint, int flag, int verbose) { sysevent_t *ev; sysevent_id_t eid; int rv, km_flag; sysevent_value_t evnt_val; sysevent_attr_list_t *evnt_attr_list = NULL; char attach_pnt[MAXNAMELEN]; km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; attach_pnt[0] = '\0'; if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) { rv = -1; goto logexit; } if (verbose) { DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n", attach_pnt, hint, flag, verbose); } if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE, SUNW_KERN_PUB"dr", km_flag)) == NULL) { rv = -2; goto logexit; } evnt_val.value_type = SE_DATA_TYPE_STRING; evnt_val.value.sv_string = attach_pnt; if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val, km_flag)) != 0) goto logexit; evnt_val.value_type = SE_DATA_TYPE_STRING; evnt_val.value.sv_string = hint; if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val, km_flag)) != 0) { sysevent_free_attr(evnt_attr_list); goto logexit; } (void) sysevent_attach_attributes(ev, evnt_attr_list); /* * Log the event but do not sleep waiting for its * delivery. This provides insulation from syseventd. */ rv = log_sysevent(ev, SE_NOSLEEP, &eid); logexit: if (ev) sysevent_free(ev); if ((rv != 0) && verbose) cmn_err(CE_WARN, "drmach_log_sysevent failed (rv %d) for %s " " %s\n", rv, attach_pnt, hint); return (rv); } #define OPL_DR_STATUS_PROP "dr-status" static int opl_check_dr_status() { pnode_t node; int rtn, len; char *str; node = prom_rootnode(); if (node == OBP_BADNODE) { return (1); } len = prom_getproplen(node, OPL_DR_STATUS_PROP); if (len == -1) { /* * dr-status doesn't exist when DR is activated and * any warning messages aren't needed. */ return (1); } str = (char *)kmem_zalloc(len+1, KM_SLEEP); rtn = prom_getprop(node, OPL_DR_STATUS_PROP, str); kmem_free(str, len + 1); if (rtn == -1) { return (1); } else { return (0); } } /* we are allocating memlist from TLB locked pages to avoid tlbmisses */ static struct memlist * drmach_memlist_add_span(drmach_copy_rename_program_t *p, struct memlist *mlist, uint64_t base, uint64_t len) { struct memlist *ml, *tl, *nl; if (len == 0ull) return (NULL); if (mlist == NULL) { mlist = p->free_mlist; if (mlist == NULL) return (NULL); p->free_mlist = mlist->next; mlist->address = base; mlist->size = len; mlist->next = mlist->prev = NULL; return (mlist); } for (tl = ml = mlist; ml; tl = ml, ml = ml->next) { if (base < ml->address) { if ((base + len) < ml->address) { nl = p->free_mlist; if (nl == NULL) return (NULL); p->free_mlist = nl->next; nl->address = base; nl->size = len; nl->next = ml; if ((nl->prev = ml->prev) != NULL) nl->prev->next = nl; ml->prev = nl; if (mlist == ml) mlist = nl; } else { ml->size = MAX((base + len), (ml->address + ml->size)) - base; ml->address = base; } break; } else if (base <= (ml->address + ml->size)) { ml->size = MAX((base + len), (ml->address + ml->size)) - MIN(ml->address, base); ml->address = MIN(ml->address, base); break; } } if (ml == NULL) { nl = p->free_mlist; if (nl == NULL) return (NULL); p->free_mlist = nl->next; nl->address = base; nl->size = len; nl->next = NULL; nl->prev = tl; tl->next = nl; } return (mlist); } /* * The routine performs the necessary memory COPY and MC adr SWITCH. * Both operations MUST be at the same "level" so that the stack is * maintained correctly between the copy and switch. The switch * portion implements a caching mechanism to guarantee the code text * is cached prior to execution. This is to guard against possible * memory access while the MC adr's are being modified. * * IMPORTANT: The _drmach_copy_rename_end() function must immediately * follow drmach_copy_rename_prog__relocatable() so that the correct * "length" of the drmach_copy_rename_prog__relocatable can be * calculated. This routine MUST be a LEAF function, i.e. it can * make NO function calls, primarily for two reasons: * * 1. We must keep the stack consistent across the "switch". * 2. Function calls are compiled to relative offsets, and * we execute this function we'll be executing it from * a copied version in a different area of memory, thus * the relative offsets will be bogus. * * Moreover, it must have the "__relocatable" suffix to inform DTrace * providers (and anything else, for that matter) that this * function's text is manually relocated elsewhere before it is * executed. That is, it cannot be safely instrumented with any * methodology that is PC-relative. */ /* * We multiply this to system_clock_frequency so we * are setting a delay of fmem_timeout second for * the rename command. * * FMEM command itself should complete within 15 sec. * We add 2 more sec to be conservative. * * Note that there is also a SCF BUSY bit checking * in drmach_asm.s right before FMEM command is * issued. XSCF sets the SCF BUSY bit when the * other domain on the same PSB reboots and it * will not be able to service the FMEM command * within 15 sec. After setting the SCF BUSY * bit, XSCF will wait a while before servicing * other reboot command so there is no race * condition. */ static int fmem_timeout = 17; /* * The empirical data on some OPL system shows that * we can copy 250 MB per second. We set it to * 80 MB to be conservative. In normal case, * this timeout does not affect anything. */ static int min_copy_size_per_sec = 80 * 1024 * 1024; /* * This is the timeout value for the xcall synchronization * to get all the CPU ready to do the parallel copying. * Even on a fully loaded system, 10 sec. should be long * enough. */ static int cpu_xcall_delay = 10; int drmach_disable_mcopy = 0; /* * The following delay loop executes sleep instruction to yield the * CPU to other strands. If this is not done, some strand will tie * up the CPU in busy loops while the other strand cannot do useful * work. The copy procedure will take a much longer time without this. */ #define DR_DELAY_IL(ms, freq) \ { \ uint64_t start; \ uint64_t nstick; \ volatile uint64_t now; \ nstick = ((uint64_t)ms * freq)/1000; \ start = drmach_get_stick_il(); \ now = start; \ while ((now - start) <= nstick) { \ drmach_sleep_il(); \ now = drmach_get_stick_il(); \ } \ } /* Each loop is 2ms, timeout at 1000ms */ static int drmach_copy_rename_timeout = 500; static int drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog, int cpuid) { struct memlist *ml; register int rtn; int i; register uint64_t curr, limit; extern uint64_t drmach_get_stick_il(); extern void membar_sync_il(); extern void flush_instr_mem_il(void*); extern void flush_windows_il(void); uint64_t copy_start; /* * flush_windows is moved here to make sure all * registers used in the callers are flushed to * memory before the copy. * * If flush_windows() is called too early in the * calling function, the compiler might put some * data in the local registers after flush_windows(). * After FMA, if there is any fill trap, the registers * will contain stale data. */ flush_windows_il(); prog->critical->stat[cpuid] = FMEM_LOOP_COPY_READY; membar_sync_il(); if (prog->data->cpuid == cpuid) { limit = drmach_get_stick_il(); limit += cpu_xcall_delay * system_clock_freq; for (i = 0; i < NCPU; i++) { if (CPU_IN_SET(prog->data->cpu_slave_set, i)) { /* wait for all CPU's to be ready */ for (;;) { if (prog->critical->stat[i] == FMEM_LOOP_COPY_READY) { break; } DR_DELAY_IL(1, prog->data->stick_freq); } curr = drmach_get_stick_il(); if (curr > limit) { prog->data->fmem_status.error = EOPL_FMEM_XC_TIMEOUT; return (EOPL_FMEM_XC_TIMEOUT); } } } prog->data->fmem_status.stat = FMEM_LOOP_COPY_READY; membar_sync_il(); copy_start = drmach_get_stick_il(); } else { for (;;) { if (prog->data->fmem_status.stat == FMEM_LOOP_COPY_READY) { break; } if (prog->data->fmem_status.error) { prog->data->error[cpuid] = EOPL_FMEM_TERMINATE; return (EOPL_FMEM_TERMINATE); } DR_DELAY_IL(1, prog->data->stick_freq); } } /* * DO COPY. */ if (CPU_IN_SET(prog->data->cpu_copy_set, cpuid)) { for (ml = prog->data->cpu_ml[cpuid]; ml; ml = ml->next) { uint64_t s_pa, t_pa; uint64_t nbytes; s_pa = prog->data->s_copybasepa + ml->address; t_pa = prog->data->t_copybasepa + ml->address; nbytes = ml->size; while (nbytes != 0ull) { /* * If the master has detected error, we just * bail out */ if (prog->data->fmem_status.error != ESBD_NOERROR) { prog->data->error[cpuid] = EOPL_FMEM_TERMINATE; return (EOPL_FMEM_TERMINATE); } /* * This copy does NOT use an ASI * that avoids the Ecache, therefore * the dst_pa addresses may remain * in our Ecache after the dst_pa * has been removed from the system. * A subsequent write-back to memory * will cause an ARB-stop because the * physical address no longer exists * in the system. Therefore we must * flush out local Ecache after we * finish the copy. */ /* copy 32 bytes at src_pa to dst_pa */ bcopy32_il(s_pa, t_pa); /* * increment the counter to signal that we are * alive */ prog->stat->nbytes[cpuid] += 32; /* increment by 32 bytes */ s_pa += (4 * sizeof (uint64_t)); t_pa += (4 * sizeof (uint64_t)); /* decrement by 32 bytes */ nbytes -= (4 * sizeof (uint64_t)); } } prog->critical->stat[cpuid] = FMEM_LOOP_COPY_DONE; membar_sync_il(); } /* * Since bcopy32_il() does NOT use an ASI to bypass * the Ecache, we need to flush our Ecache after * the copy is complete. */ flush_cache_il(); /* * drmach_fmem_exec_script() */ if (prog->data->cpuid == cpuid) { uint64_t last, now; limit = copy_start + prog->data->copy_delay; for (i = 0; i < NCPU; i++) { if (!CPU_IN_SET(prog->data->cpu_slave_set, i)) continue; for (;;) { /* * we get FMEM_LOOP_FMEM_READY in * normal case */ if (prog->critical->stat[i] == FMEM_LOOP_FMEM_READY) { break; } /* got error traps */ if (prog->data->error[i] == EOPL_FMEM_COPY_ERROR) { prog->data->fmem_status.error = EOPL_FMEM_COPY_ERROR; return (EOPL_FMEM_COPY_ERROR); } /* * if we have not reached limit, wait * more */ curr = drmach_get_stick_il(); if (curr <= limit) continue; prog->data->slowest_cpuid = i; prog->data->copy_wait_time = curr - copy_start; /* now check if slave is alive */ last = prog->stat->nbytes[i]; DR_DELAY_IL(1, prog->data->stick_freq); now = prog->stat->nbytes[i]; if (now <= last) { /* * no progress, perhaps just * finished */ DR_DELAY_IL(1, prog->data->stick_freq); if (prog->critical->stat[i] == FMEM_LOOP_FMEM_READY) break; /* copy error */ if (prog->data->error[i] == EOPL_FMEM_COPY_ERROR) { prog->data-> fmem_status.error = EOPL_FMEM_COPY_ERROR; return (EOPL_FMEM_COPY_ERROR); } prog->data->copy_rename_count++; if (prog->data->copy_rename_count < drmach_copy_rename_timeout) { continue; } else { prog->data->fmem_status.error = EOPL_FMEM_COPY_TIMEOUT; return (EOPL_FMEM_COPY_TIMEOUT); } } } } prog->critical->stat[cpuid] = FMEM_LOOP_FMEM_READY; prog->data->fmem_status.stat = FMEM_LOOP_FMEM_READY; membar_sync_il(); flush_instr_mem_il((void*) (prog->critical)); /* * drmach_fmem_exec_script() */ rtn = prog->critical->fmem((void *)prog->critical, PAGESIZE); return (rtn); } else { flush_instr_mem_il((void*) (prog->critical)); /* * drmach_fmem_loop_script() */ rtn = prog->critical->loop((void *)(prog->critical), PAGESIZE, (void *)&(prog->critical->stat[cpuid])); prog->data->error[cpuid] = rtn; /* slave thread does not care the rv */ return (0); } } static void drmach_copy_rename_end(void) { /* * IMPORTANT: This function's location MUST be located immediately * following drmach_copy_rename_prog__relocatable to * accurately estimate its size. Note that this assumes * the compiler keeps these functions in the order in * which they appear :-o */ } static int drmach_setup_memlist(drmach_copy_rename_program_t *p) { struct memlist *ml; caddr_t buf; int nbytes, s, n_elements; nbytes = PAGESIZE; n_elements = 0; s = roundup(sizeof (struct memlist), sizeof (void *)); p->free_mlist = NULL; buf = p->memlist_buffer; while (nbytes >= sizeof (struct memlist)) { ml = (struct memlist *)buf; ml->next = p->free_mlist; p->free_mlist = ml; buf += s; n_elements++; nbytes -= s; } return (n_elements); } static void drmach_lock_critical(caddr_t va, caddr_t new_va) { tte_t tte; int i; kpreempt_disable(); for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) { vtag_flushpage(new_va, (uint64_t)ksfmmup); sfmmu_memtte(&tte, va_to_pfn(va), PROC_DATA|HAT_NOSYNC, TTE8K); tte.tte_intlo |= TTE_LCK_INT; sfmmu_dtlb_ld_kva(new_va, &tte); sfmmu_itlb_ld_kva(new_va, &tte); va += PAGESIZE; new_va += PAGESIZE; } } static void drmach_unlock_critical(caddr_t va) { int i; for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) { vtag_flushpage(va, (uint64_t)ksfmmup); va += PAGESIZE; } kpreempt_enable(); } sbd_error_t * drmach_copy_rename_init(drmachid_t t_id, drmachid_t s_id, struct memlist *c_ml, drmachid_t *pgm_id) { drmach_mem_t *s_mem; drmach_mem_t *t_mem; struct memlist *x_ml; uint64_t s_copybasepa, t_copybasepa; uint_t len; caddr_t bp, wp; int s_bd, t_bd, cpuid, active_cpus, i; int max_elms, mlist_size, rv; uint64_t c_addr; size_t c_size, copy_sz, sz; extern void drmach_fmem_loop_script(); extern void drmach_fmem_loop_script_rtn(); extern int drmach_fmem_exec_script(); extern void drmach_fmem_exec_script_end(); sbd_error_t *err; drmach_copy_rename_program_t *prog = NULL; drmach_copy_rename_program_t *prog_kmem = NULL; void (*mc_suspend)(void); void (*mc_resume)(void); int (*scf_fmem_start)(int, int); int (*scf_fmem_end)(void); int (*scf_fmem_cancel)(void); uint64_t (*scf_get_base_addr)(void); if (!DRMACH_IS_MEM_ID(s_id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); if (!DRMACH_IS_MEM_ID(t_id)) return (drerr_new(0, EOPL_INAPPROP, NULL)); for (i = 0; i < NCPU; i++) { int lsb_id, onb_core_num, strand_id; drmach_board_t *bp; /* * this kind of CPU will spin in cache */ if (CPU_IN_SET(cpu_ready_set, i)) continue; /* * Now check for any inactive CPU's that * have been hotadded. This can only occur in * error condition in drmach_cpu_poweron(). */ lsb_id = LSB_ID(i); onb_core_num = ON_BOARD_CORE_NUM(i); strand_id = STRAND_ID(i); bp = drmach_get_board_by_bnum(lsb_id); if (bp == NULL) continue; if (bp->cores[onb_core_num].core_hotadded & (1 << strand_id)) { if (!(bp->cores[onb_core_num].core_started & (1 << strand_id))) { return (drerr_new(1, EOPL_CPU_STATE, NULL)); } } } mc_suspend = (void (*)(void)) modgetsymvalue("opl_mc_suspend", 0); mc_resume = (void (*)(void)) modgetsymvalue("opl_mc_resume", 0); if (mc_suspend == NULL || mc_resume == NULL) { return (drerr_new(1, EOPL_MC_OPL, NULL)); } scf_fmem_start = (int (*)(int, int)) modgetsymvalue("scf_fmem_start", 0); if (scf_fmem_start == NULL) { return (drerr_new(1, EOPL_SCF_FMEM, NULL)); } scf_fmem_end = (int (*)(void)) modgetsymvalue("scf_fmem_end", 0); if (scf_fmem_end == NULL) { return (drerr_new(1, EOPL_SCF_FMEM, NULL)); } scf_fmem_cancel = (int (*)(void)) modgetsymvalue("scf_fmem_cancel", 0); if (scf_fmem_cancel == NULL) { return (drerr_new(1, EOPL_SCF_FMEM, NULL)); } scf_get_base_addr = (uint64_t (*)(void)) modgetsymvalue("scf_get_base_addr", 0); if (scf_get_base_addr == NULL) { return (drerr_new(1, EOPL_SCF_FMEM, NULL)); } s_mem = s_id; t_mem = t_id; s_bd = s_mem->dev.bp->bnum; t_bd = t_mem->dev.bp->bnum; /* calculate source and target base pa */ s_copybasepa = s_mem->slice_base; t_copybasepa = t_mem->slice_base; /* adjust copy memlist addresses to be relative to copy base pa */ x_ml = c_ml; mlist_size = 0; while (x_ml != NULL) { x_ml->address -= s_copybasepa; x_ml = x_ml->next; mlist_size++; } /* * bp will be page aligned, since we're calling * kmem_zalloc() with an exact multiple of PAGESIZE. */ prog_kmem = (drmach_copy_rename_program_t *)kmem_zalloc( DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, KM_SLEEP); prog_kmem->prog = prog_kmem; /* * To avoid MTLB hit, we allocate a new VM space and remap * the kmem_alloc buffer to that address. This solves * 2 problems we found: * - the kmem_alloc buffer can be just a chunk inside * a much larger, e.g. 4MB buffer and MTLB will occur * if there are both a 4MB and a 8K TLB mapping to * the same VA range. * - the kmem mapping got dropped into the TLB by other * strands, unintentionally. * Note that the pointers like data, critical, memlist_buffer, * and stat inside the copy rename structure are mapped to this * alternate VM space so we must make sure we lock the TLB mapping * whenever we access data pointed to by these pointers. */ prog = prog_kmem->locked_prog = vmem_alloc(heap_arena, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, VM_SLEEP); wp = bp = (caddr_t)prog; /* Now remap prog_kmem to prog */ drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog); /* All pointers in prog are based on the alternate mapping */ prog->data = (drmach_copy_rename_data_t *)roundup(((uint64_t)prog + sizeof (drmach_copy_rename_program_t)), sizeof (void *)); ASSERT(((uint64_t)prog->data + sizeof (drmach_copy_rename_data_t)) <= ((uint64_t)prog + PAGESIZE)); prog->critical = (drmach_copy_rename_critical_t *) (wp + DRMACH_FMEM_CRITICAL_PAGE * PAGESIZE); prog->memlist_buffer = (caddr_t)(wp + DRMACH_FMEM_MLIST_PAGE * PAGESIZE); prog->stat = (drmach_cr_stat_t *)(wp + DRMACH_FMEM_STAT_PAGE * PAGESIZE); /* LINTED */ ASSERT(sizeof (drmach_cr_stat_t) <= ((DRMACH_FMEM_LOCKED_PAGES - DRMACH_FMEM_STAT_PAGE) * PAGESIZE)); prog->critical->scf_reg_base = (uint64_t)-1; prog->critical->scf_td[0] = (s_bd & 0xff); prog->critical->scf_td[1] = (t_bd & 0xff); for (i = 2; i < 15; i++) { prog->critical->scf_td[i] = 0; } prog->critical->scf_td[15] = ((0xaa + s_bd + t_bd) & 0xff); bp = (caddr_t)prog->critical; len = sizeof (drmach_copy_rename_critical_t); wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *)); len = (uint_t)((ulong_t)drmach_copy_rename_end - (ulong_t)drmach_copy_rename_prog__relocatable); /* * We always leave 1K nop's to prevent the processor from * speculative execution that causes memory access */ wp = wp + len + 1024; len = (uint_t)((ulong_t)drmach_fmem_exec_script_end - (ulong_t)drmach_fmem_exec_script); /* this is the entry point of the loop script */ wp = wp + len + 1024; len = (uint_t)((ulong_t)drmach_fmem_exec_script - (ulong_t)drmach_fmem_loop_script); wp = wp + len + 1024; /* now we make sure there is 1K extra */ if ((wp - bp) > PAGESIZE) { err = drerr_new(1, EOPL_FMEM_SETUP, NULL); goto out; } bp = (caddr_t)prog->critical; len = sizeof (drmach_copy_rename_critical_t); wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *)); prog->critical->run = (int (*)())(wp); len = (uint_t)((ulong_t)drmach_copy_rename_end - (ulong_t)drmach_copy_rename_prog__relocatable); bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len); wp = (caddr_t)roundup((uint64_t)wp + len, 1024); prog->critical->fmem = (int (*)())(wp); len = (int)((ulong_t)drmach_fmem_exec_script_end - (ulong_t)drmach_fmem_exec_script); bcopy((caddr_t)drmach_fmem_exec_script, wp, len); len = (int)((ulong_t)drmach_fmem_exec_script_end - (ulong_t)drmach_fmem_exec_script); wp = (caddr_t)roundup((uint64_t)wp + len, 1024); prog->critical->loop = (int (*)())(wp); len = (int)((ulong_t)drmach_fmem_exec_script - (ulong_t)drmach_fmem_loop_script); bcopy((caddr_t)drmach_fmem_loop_script, (void *)wp, len); len = (int)((ulong_t)drmach_fmem_loop_script_rtn- (ulong_t)drmach_fmem_loop_script); prog->critical->loop_rtn = (void (*)()) (wp+len); prog->data->fmem_status.error = ESBD_NOERROR; /* now we are committed, call SCF, soft suspend mac patrol */ if ((*scf_fmem_start)(s_bd, t_bd)) { err = drerr_new(1, EOPL_SCF_FMEM_START, NULL); goto out; } prog->data->scf_fmem_end = scf_fmem_end; prog->data->scf_fmem_cancel = scf_fmem_cancel; prog->data->scf_get_base_addr = scf_get_base_addr; prog->data->fmem_status.op |= OPL_FMEM_SCF_START; /* soft suspend mac patrol */ (*mc_suspend)(); prog->data->fmem_status.op |= OPL_FMEM_MC_SUSPEND; prog->data->mc_resume = mc_resume; prog->critical->inst_loop_ret = *(uint64_t *)(prog->critical->loop_rtn); /* * 0x30800000 is op code "ba,a +0" */ *(uint_t *)(prog->critical->loop_rtn) = (uint_t)(0x30800000); /* * set the value of SCF FMEM TIMEOUT */ prog->critical->delay = fmem_timeout * system_clock_freq; prog->data->s_mem = (drmachid_t)s_mem; prog->data->t_mem = (drmachid_t)t_mem; cpuid = CPU->cpu_id; prog->data->cpuid = cpuid; prog->data->cpu_ready_set = cpu_ready_set; prog->data->cpu_slave_set = cpu_ready_set; prog->data->slowest_cpuid = (processorid_t)-1; prog->data->copy_wait_time = 0; prog->data->copy_rename_count = 0; CPUSET_DEL(prog->data->cpu_slave_set, cpuid); for (i = 0; i < NCPU; i++) { prog->data->cpu_ml[i] = NULL; } /* * max_elms - max number of memlist structures that * may be allocated for the CPU memory list. * If there are too many memory span (because * of fragmentation) than number of memlist * available, we should return error. */ max_elms = drmach_setup_memlist(prog); if (max_elms < mlist_size) { err = drerr_new(1, EOPL_FMEM_SETUP, NULL); goto err_out; } active_cpus = 0; if (drmach_disable_mcopy) { active_cpus = 1; CPUSET_ADD(prog->data->cpu_copy_set, cpuid); } else { int max_cpu_num; /* * The parallel copy procedure is going to split some * of the elements of the original memory copy list. * The number of added elements can be up to * (max_cpu_num - 1). It means that max_cpu_num * should satisfy the following condition: * (max_cpu_num - 1) + mlist_size <= max_elms. */ max_cpu_num = max_elms - mlist_size + 1; for (i = 0; i < NCPU; i++) { if (CPU_IN_SET(cpu_ready_set, i) && CPU_ACTIVE(cpu[i])) { /* * To reduce the level-2 cache contention only * one strand per core will participate * in the copy. If the strand with even cpu_id * number is present in the ready set, we will * include this strand in the copy set. If it * is not present in the ready set, we check for * the strand with the consecutive odd cpu_id * and include it, provided that it is * present in the ready set. */ if (!(i & 0x1) || !CPU_IN_SET(prog->data->cpu_copy_set, i - 1)) { CPUSET_ADD(prog->data->cpu_copy_set, i); active_cpus++; /* * We cannot have more than * max_cpu_num CPUs in the copy * set, because each CPU has to * have at least one element * long memory copy list. */ if (active_cpus >= max_cpu_num) break; } } } } x_ml = c_ml; sz = 0; while (x_ml != NULL) { sz += x_ml->size; x_ml = x_ml->next; } copy_sz = sz/active_cpus; copy_sz = roundup(copy_sz, MMU_PAGESIZE4M); while (sz > copy_sz*active_cpus) { copy_sz += MMU_PAGESIZE4M; } prog->data->stick_freq = system_clock_freq; prog->data->copy_delay = ((copy_sz / min_copy_size_per_sec) + 2) * system_clock_freq; x_ml = c_ml; c_addr = x_ml->address; c_size = x_ml->size; for (i = 0; i < NCPU; i++) { prog->stat->nbytes[i] = 0; if (!CPU_IN_SET(prog->data->cpu_copy_set, i)) { continue; } sz = copy_sz; while (sz) { if (c_size > sz) { if ((prog->data->cpu_ml[i] = drmach_memlist_add_span(prog, prog->data->cpu_ml[i], c_addr, sz)) == NULL) { cmn_err(CE_WARN, "Unexpected drmach_memlist_add_span" " failure."); err = drerr_new(1, EOPL_FMEM_SETUP, NULL); mc_resume(); goto out; } c_addr += sz; c_size -= sz; break; } else { sz -= c_size; if ((prog->data->cpu_ml[i] = drmach_memlist_add_span(prog, prog->data->cpu_ml[i], c_addr, c_size)) == NULL) { cmn_err(CE_WARN, "Unexpected drmach_memlist_add_span" " failure."); err = drerr_new(1, EOPL_FMEM_SETUP, NULL); mc_resume(); goto out; } x_ml = x_ml->next; if (x_ml != NULL) { c_addr = x_ml->address; c_size = x_ml->size; } else { goto end; } } } } end: prog->data->s_copybasepa = s_copybasepa; prog->data->t_copybasepa = t_copybasepa; prog->data->c_ml = c_ml; *pgm_id = prog_kmem; /* Unmap the alternate space. It will have to be remapped again */ drmach_unlock_critical((caddr_t)prog); return (NULL); err_out: mc_resume(); rv = (*prog->data->scf_fmem_cancel)(); if (rv) { cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x", rv); } out: if (prog != NULL) { drmach_unlock_critical((caddr_t)prog); vmem_free(heap_arena, prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE); } if (prog_kmem != NULL) { kmem_free(prog_kmem, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE); } return (err); } sbd_error_t * drmach_copy_rename_fini(drmachid_t id) { drmach_copy_rename_program_t *prog = id; sbd_error_t *err = NULL; int rv; uint_t fmem_error; /* * Note that we have to delay calling SCF to find out the * status of the FMEM operation here because SCF cannot * respond while it is suspended. * This create a small window when we are sure about the * base address of the system board. * If there is any call to mc-opl to get memory unum, * mc-opl will return UNKNOWN as the unum. */ /* * we have to remap again because all the pointer like data, * critical in prog are based on the alternate vmem space. */ (void) drmach_lock_critical((caddr_t)prog, (caddr_t)prog->locked_prog); if (prog->data->c_ml != NULL) memlist_delete(prog->data->c_ml); if ((prog->data->fmem_status.op & (OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) != (OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) { cmn_err(CE_PANIC, "drmach_copy_rename_fini: invalid op " "code %x\n", prog->data->fmem_status.op); } fmem_error = prog->data->fmem_status.error; if (fmem_error != ESBD_NOERROR) { err = drerr_new(1, fmem_error, NULL); } /* possible ops are SCF_START, MC_SUSPEND */ if (prog->critical->fmem_issued) { if (fmem_error != ESBD_NOERROR) { cmn_err(CE_PANIC, "Irrecoverable FMEM error %d\n", fmem_error); } rv = (*prog->data->scf_fmem_end)(); if (rv) { cmn_err(CE_PANIC, "scf_fmem_end() failed rv=%d", rv); } /* * If we get here, rename is successful. * Do all the copy rename post processing. */ drmach_swap_pa((drmach_mem_t *)prog->data->s_mem, (drmach_mem_t *)prog->data->t_mem); } else { rv = (*prog->data->scf_fmem_cancel)(); if (rv) { cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x", rv); if (!err) { err = drerr_new(1, EOPL_SCF_FMEM_CANCEL, "scf_fmem_cancel() failed. rv = 0x%x", rv); } } } /* soft resume mac patrol */ (*prog->data->mc_resume)(); drmach_unlock_critical((caddr_t)prog->locked_prog); vmem_free(heap_arena, prog->locked_prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE); kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE); return (err); } /*ARGSUSED*/ static void drmach_copy_rename_slave(struct regs *rp, drmachid_t id) { drmach_copy_rename_program_t *prog = (drmach_copy_rename_program_t *)id; register int cpuid; extern void drmach_flush(); extern void membar_sync_il(); extern void drmach_flush_icache(); on_trap_data_t otd; cpuid = CPU->cpu_id; if (on_trap(&otd, OT_DATA_EC)) { no_trap(); prog->data->error[cpuid] = EOPL_FMEM_COPY_ERROR; prog->critical->stat[cpuid] = FMEM_LOOP_EXIT; drmach_flush_icache(); membar_sync_il(); return; } /* * jmp drmach_copy_rename_prog(). */ drmach_flush(prog->critical, PAGESIZE); (void) prog->critical->run(prog, cpuid); drmach_flush_icache(); no_trap(); prog->critical->stat[cpuid] = FMEM_LOOP_EXIT; membar_sync_il(); } static void drmach_swap_pa(drmach_mem_t *s_mem, drmach_mem_t *t_mem) { uint64_t s_base, t_base; drmach_board_t *s_board, *t_board; struct memlist *ml; s_board = s_mem->dev.bp; t_board = t_mem->dev.bp; if (s_board == NULL || t_board == NULL) { cmn_err(CE_PANIC, "Cannot locate source or target board\n"); return; } s_base = s_mem->slice_base; t_base = t_mem->slice_base; s_mem->slice_base = t_base; s_mem->base_pa = (s_mem->base_pa - s_base) + t_base; for (ml = s_mem->memlist; ml; ml = ml->next) { ml->address = ml->address - s_base + t_base; } t_mem->slice_base = s_base; t_mem->base_pa = (t_mem->base_pa - t_base) + s_base; for (ml = t_mem->memlist; ml; ml = ml->next) { ml->address = ml->address - t_base + s_base; } /* * IKP has to update the sb-mem-ranges for mac patrol driver * when it resumes, it will re-read the sb-mem-range property * to get the new base address */ if (oplcfg_pa_swap(s_board->bnum, t_board->bnum) != 0) cmn_err(CE_PANIC, "Could not update device nodes\n"); } void drmach_copy_rename(drmachid_t id) { drmach_copy_rename_program_t *prog_kmem = id; drmach_copy_rename_program_t *prog; cpuset_t cpuset; int cpuid; uint64_t inst; register int rtn; extern int in_sync; int old_in_sync; extern void drmach_sys_trap(); extern void drmach_flush(); extern void drmach_flush_icache(); extern uint64_t patch_inst(uint64_t *, uint64_t); on_trap_data_t otd; prog = prog_kmem->locked_prog; /* * We must immediately drop in the TLB because all pointers * are based on the alternate vmem space. */ (void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog); /* * we call scf to get the base address here becuase if scf * has not been suspended yet, the active path can be changing and * sometimes it is not even mapped. We call the interface when * the OS has been quiesced. */ prog->critical->scf_reg_base = (*prog->data->scf_get_base_addr)(); if (prog->critical->scf_reg_base == (uint64_t)-1 || prog->critical->scf_reg_base == NULL) { prog->data->fmem_status.error = EOPL_FMEM_SCF_ERR; drmach_unlock_critical((caddr_t)prog); return; } cpuset = prog->data->cpu_ready_set; for (cpuid = 0; cpuid < NCPU; cpuid++) { if (CPU_IN_SET(cpuset, cpuid)) { prog->critical->stat[cpuid] = FMEM_LOOP_START; prog->data->error[cpuid] = ESBD_NOERROR; } } old_in_sync = in_sync; in_sync = 1; cpuid = CPU->cpu_id; CPUSET_DEL(cpuset, cpuid); for (cpuid = 0; cpuid < NCPU; cpuid++) { if (CPU_IN_SET(cpuset, cpuid)) { xc_one(cpuid, (xcfunc_t *)drmach_lock_critical, (uint64_t)prog_kmem, (uint64_t)prog); } } cpuid = CPU->cpu_id; xt_some(cpuset, (xcfunc_t *)drmach_sys_trap, (uint64_t)drmach_copy_rename_slave, (uint64_t)prog); xt_sync(cpuset); if (on_trap(&otd, OT_DATA_EC)) { rtn = EOPL_FMEM_COPY_ERROR; drmach_flush_icache(); goto done; } /* * jmp drmach_copy_rename_prog(). */ drmach_flush(prog->critical, PAGESIZE); rtn = prog->critical->run(prog, cpuid); drmach_flush_icache(); done: no_trap(); if (rtn == EOPL_FMEM_HW_ERROR) { kpreempt_enable(); prom_panic("URGENT_ERROR_TRAP is detected during FMEM.\n"); } /* * In normal case, all slave CPU's are still spinning in * the assembly code. The master has to patch the instruction * to get them out. * In error case, e.g. COPY_ERROR, some slave CPU's might * have aborted and already returned and sset LOOP_EXIT status. * Some CPU might still be copying. * In any case, some delay is necessary to give them * enough time to set the LOOP_EXIT status. */ for (;;) { inst = patch_inst((uint64_t *)prog->critical->loop_rtn, prog->critical->inst_loop_ret); if (prog->critical->inst_loop_ret == inst) { break; } } for (cpuid = 0; cpuid < NCPU; cpuid++) { uint64_t last, now; if (!CPU_IN_SET(cpuset, cpuid)) { continue; } last = prog->stat->nbytes[cpuid]; /* * Wait for all CPU to exit. * However we do not want an infinite loop * so we detect hangup situation here. * If the slave CPU is still copying data, * we will continue to wait. * In error cases, the master has already set * fmem_status.error to abort the copying. * 1 m.s delay for them to abort copying and * return to drmach_copy_rename_slave to set * FMEM_LOOP_EXIT status should be enough. */ for (;;) { if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT) break; drmach_sleep_il(); drv_usecwait(1000); now = prog->stat->nbytes[cpuid]; if (now <= last) { drv_usecwait(1000); if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT) break; cmn_err(CE_PANIC, "CPU %d hang during Copy " "Rename", cpuid); } last = now; } if (prog->data->error[cpuid] == EOPL_FMEM_HW_ERROR) { prom_panic("URGENT_ERROR_TRAP is detected during " "FMEM.\n"); } } /* * This must be done after all strands have exit. * Removing the TLB entry will affect both strands * in the same core. */ for (cpuid = 0; cpuid < NCPU; cpuid++) { if (CPU_IN_SET(cpuset, cpuid)) { xc_one(cpuid, (xcfunc_t *)drmach_unlock_critical, (uint64_t)prog, 0); } } in_sync = old_in_sync; /* * we should unlock before the following lock to keep the kpreempt * count correct. */ (void) drmach_unlock_critical((caddr_t)prog); /* * we must remap again. TLB might have been removed in above xcall. */ (void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog); if (prog->data->fmem_status.error == ESBD_NOERROR) prog->data->fmem_status.error = rtn; if (prog->data->copy_wait_time > 0) { DRMACH_PR("Unexpected long wait time %ld seconds " "during copy rename on CPU %d\n", prog->data->copy_wait_time/prog->data->stick_freq, prog->data->slowest_cpuid); } drmach_unlock_critical((caddr_t)prog); }