1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2007 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/conf.h> 34 #include <sys/modctl.h> 35 #include <sys/stat.h> 36 #include <sys/async.h> 37 #include <sys/machcpuvar.h> 38 #include <sys/machsystm.h> 39 #include <sys/promif.h> 40 #include <sys/ksynch.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/sunndi.h> 44 #include <sys/ddifm.h> 45 #include <sys/fm/protocol.h> 46 #include <sys/fm/util.h> 47 #include <sys/kmem.h> 48 #include <sys/fm/io/opl_mc_fm.h> 49 #include <sys/memlist.h> 50 #include <sys/param.h> 51 #include <sys/disp.h> 52 #include <vm/page.h> 53 #include <sys/mc-opl.h> 54 #include <sys/opl.h> 55 #include <sys/opl_dimm.h> 56 #include <sys/scfd/scfostoescf.h> 57 #include <sys/cpu_module.h> 58 #include <vm/seg_kmem.h> 59 #include <sys/vmem.h> 60 #include <vm/hat_sfmmu.h> 61 #include <sys/vmsystm.h> 62 #include <sys/membar.h> 63 64 /* 65 * Function prototypes 66 */ 67 static int mc_open(dev_t *, int, int, cred_t *); 68 static int mc_close(dev_t, int, int, cred_t *); 69 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 70 static int mc_attach(dev_info_t *, ddi_attach_cmd_t); 71 static int mc_detach(dev_info_t *, ddi_detach_cmd_t); 72 73 static int mc_poll_init(void); 74 static void mc_poll_fini(void); 75 static int mc_board_add(mc_opl_t *mcp); 76 static int mc_board_del(mc_opl_t *mcp); 77 static int mc_suspend(mc_opl_t *mcp, uint32_t flag); 78 static int mc_resume(mc_opl_t *mcp, uint32_t flag); 79 int opl_mc_suspend(void); 80 int opl_mc_resume(void); 81 82 static void insert_mcp(mc_opl_t *mcp); 83 static void delete_mcp(mc_opl_t *mcp); 84 85 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr); 86 87 static int mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa); 88 89 int mc_get_mem_unum(int, uint64_t, char *, int, int *); 90 int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr); 91 int mc_get_mem_offset(uint64_t paddr, uint64_t *offp); 92 int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp); 93 int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 94 int buflen, int *lenp); 95 mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp); 96 mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp); 97 int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank, 98 uint32_t mf_type, uint32_t d_slot); 99 static void mc_free_dimm_list(mc_dimm_info_t *d); 100 static void mc_get_mlist(mc_opl_t *); 101 static void mc_polling(void); 102 static int mc_opl_get_physical_board(int); 103 104 static void mc_clear_rewrite(mc_opl_t *mcp, int i); 105 static void mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state); 106 107 #ifdef DEBUG 108 static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *); 109 void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz); 110 void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp); 111 #endif 112 113 #pragma weak opl_get_physical_board 114 extern int opl_get_physical_board(int); 115 extern int plat_max_boards(void); 116 117 /* 118 * Configuration data structures 119 */ 120 static struct cb_ops mc_cb_ops = { 121 mc_open, /* open */ 122 mc_close, /* close */ 123 nulldev, /* strategy */ 124 nulldev, /* print */ 125 nodev, /* dump */ 126 nulldev, /* read */ 127 nulldev, /* write */ 128 mc_ioctl, /* ioctl */ 129 nodev, /* devmap */ 130 nodev, /* mmap */ 131 nodev, /* segmap */ 132 nochpoll, /* poll */ 133 ddi_prop_op, /* cb_prop_op */ 134 0, /* streamtab */ 135 D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flag */ 136 CB_REV, /* rev */ 137 nodev, /* cb_aread */ 138 nodev /* cb_awrite */ 139 }; 140 141 static struct dev_ops mc_ops = { 142 DEVO_REV, /* rev */ 143 0, /* refcnt */ 144 ddi_getinfo_1to1, /* getinfo */ 145 nulldev, /* identify */ 146 nulldev, /* probe */ 147 mc_attach, /* attach */ 148 mc_detach, /* detach */ 149 nulldev, /* reset */ 150 &mc_cb_ops, /* cb_ops */ 151 (struct bus_ops *)0, /* bus_ops */ 152 nulldev /* power */ 153 }; 154 155 /* 156 * Driver globals 157 */ 158 159 static enum { 160 MODEL_FF1 = 0, 161 MODEL_FF2 = 1, 162 MODEL_DC = 2 163 } plat_model = MODEL_DC; /* The default behaviour is DC */ 164 165 static struct plat_model_names { 166 const char *unit_name; 167 const char *mem_name; 168 } model_names[] = { 169 { "MBU_A", "MEMB" }, 170 { "MBU_B", "MEMB" }, 171 { "CMU", "" } 172 }; 173 174 /* 175 * The DIMM Names for DC platform. 176 * The index into this table is made up of (bank, dslot), 177 * Where dslot occupies bits 0-1 and bank occupies 2-4. 178 */ 179 static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = { 180 /* --------CMUnn----------- */ 181 /* --CS0-----|--CS1------ */ 182 /* -H-|--L-- | -H- | -L-- */ 183 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 184 "13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */ 185 "23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */ 186 "33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */ 187 "01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */ 188 "11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */ 189 "21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */ 190 "31A", "30A", "31B", "30B" /* Bank 7 (MAC 3 bank 1) */ 191 }; 192 193 /* 194 * The DIMM Names for FF1/FF2 platforms. 195 * The index into this table is made up of (board, bank, dslot), 196 * Where dslot occupies bits 0-1, bank occupies 2-4 and 197 * board occupies the bit 5. 198 */ 199 static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = { 200 /* --------CMU0---------- */ 201 /* --CS0-----|--CS1------ */ 202 /* -H-|--L-- | -H- | -L-- */ 203 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 204 "01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */ 205 "13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */ 206 "11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */ 207 "23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */ 208 "21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */ 209 "33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */ 210 "31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */ 211 /* --------CMU1---------- */ 212 /* --CS0-----|--CS1------ */ 213 /* -H-|--L-- | -H- | -L-- */ 214 "43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */ 215 "41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */ 216 "53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */ 217 "51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */ 218 "63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */ 219 "61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */ 220 "73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */ 221 "71A", "70A", "71B", "70B" /* Bank 7 (MAC 3 bank 1) */ 222 }; 223 224 #define BD_BK_SLOT_TO_INDEX(bd, bk, s) \ 225 (((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03)) 226 227 #define INDEX_TO_BANK(i) (((i) & 0x1C) >> 2) 228 #define INDEX_TO_SLOT(i) ((i) & 0x03) 229 230 #define SLOT_TO_CS(slot) ((slot & 0x3) >> 1) 231 232 /* Isolation unit size is 64 MB */ 233 #define MC_ISOLATION_BSIZE (64 * 1024 * 1024) 234 235 #define MC_MAX_SPEEDS 7 236 237 typedef struct { 238 uint32_t mc_speeds; 239 uint32_t mc_period; 240 } mc_scan_speed_t; 241 242 #define MC_CNTL_SPEED_SHIFT 26 243 244 /* 245 * In mirror mode, we normalized the bank idx to "even" since 246 * the HW treats them as one unit w.r.t programming. 247 * This bank index will be the "effective" bank index. 248 * All mirrored bank state info on mc_period, mc_speedup_period 249 * will be stored in the even bank structure to avoid code duplication. 250 */ 251 #define MIRROR_IDX(bankidx) (bankidx & ~1) 252 253 static mc_scan_speed_t mc_scan_speeds[MC_MAX_SPEEDS] = { 254 {0x6 << MC_CNTL_SPEED_SHIFT, 0}, 255 {0x5 << MC_CNTL_SPEED_SHIFT, 32}, 256 {0x4 << MC_CNTL_SPEED_SHIFT, 64}, 257 {0x3 << MC_CNTL_SPEED_SHIFT, 128}, 258 {0x2 << MC_CNTL_SPEED_SHIFT, 256}, 259 {0x1 << MC_CNTL_SPEED_SHIFT, 512}, 260 {0x0 << MC_CNTL_SPEED_SHIFT, 1024} 261 }; 262 263 static uint32_t mc_max_speed = (0x6 << 26); 264 265 int mc_isolation_bsize = MC_ISOLATION_BSIZE; 266 int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC; 267 int mc_max_scf_retry = 16; 268 int mc_max_scf_logs = 64; 269 int mc_max_errlog_processed = BANKNUM_PER_SB*2; 270 int mc_scan_period = 12 * 60 * 60; /* 12 hours period */ 271 int mc_max_rewrite_loop = 100; 272 int mc_rewrite_delay = 10; 273 /* 274 * it takes SCF about 300 m.s. to process a requst. We can bail out 275 * if it is busy. It does not pay to wait for it too long. 276 */ 277 int mc_max_scf_loop = 2; 278 int mc_scf_delay = 100; 279 int mc_pce_dropped = 0; 280 int mc_poll_priority = MINCLSYSPRI; 281 int mc_max_rewrite_retry = 6 * 60; 282 283 284 /* 285 * Mutex hierarchy in mc-opl 286 * If both mcmutex and mc_lock must be held, 287 * mcmutex must be acquired first, and then mc_lock. 288 */ 289 290 static kmutex_t mcmutex; 291 mc_opl_t *mc_instances[OPL_MAX_BOARDS]; 292 293 static kmutex_t mc_polling_lock; 294 static kcondvar_t mc_polling_cv; 295 static kcondvar_t mc_poll_exit_cv; 296 static int mc_poll_cmd = 0; 297 static int mc_pollthr_running = 0; 298 int mc_timeout_period = 0; /* this is in m.s. */ 299 void *mc_statep; 300 301 #ifdef DEBUG 302 int oplmc_debug = 0; 303 #endif 304 305 static int mc_debug_show_all = 0; 306 307 extern struct mod_ops mod_driverops; 308 309 static struct modldrv modldrv = { 310 &mod_driverops, /* module type, this one is a driver */ 311 "OPL Memory-controller %I%", /* module name */ 312 &mc_ops, /* driver ops */ 313 }; 314 315 static struct modlinkage modlinkage = { 316 MODREV_1, /* rev */ 317 (void *)&modldrv, 318 NULL 319 }; 320 321 #pragma weak opl_get_mem_unum 322 #pragma weak opl_get_mem_sid 323 #pragma weak opl_get_mem_offset 324 #pragma weak opl_get_mem_addr 325 326 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 327 extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 328 extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 329 extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset, 330 uint64_t *paddr); 331 332 333 /* 334 * pseudo-mc node portid format 335 * 336 * [10] = 0 337 * [9] = 1 338 * [8] = LSB_ID[4] = 0 339 * [7:4] = LSB_ID[3:0] 340 * [3:0] = 0 341 * 342 */ 343 344 /* 345 * These are the module initialization routines. 346 */ 347 int 348 _init(void) 349 { 350 int error; 351 int plen; 352 char model[20]; 353 pnode_t node; 354 355 356 if ((error = ddi_soft_state_init(&mc_statep, 357 sizeof (mc_opl_t), 1)) != 0) 358 return (error); 359 360 if ((error = mc_poll_init()) != 0) { 361 ddi_soft_state_fini(&mc_statep); 362 return (error); 363 } 364 365 mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL); 366 if (&opl_get_mem_unum) 367 opl_get_mem_unum = mc_get_mem_unum; 368 if (&opl_get_mem_sid) 369 opl_get_mem_sid = mc_get_mem_sid; 370 if (&opl_get_mem_offset) 371 opl_get_mem_offset = mc_get_mem_offset; 372 if (&opl_get_mem_addr) 373 opl_get_mem_addr = mc_get_mem_addr; 374 375 node = prom_rootnode(); 376 plen = prom_getproplen(node, "model"); 377 378 if (plen > 0 && plen < sizeof (model)) { 379 (void) prom_getprop(node, "model", model); 380 model[plen] = '\0'; 381 if (strcmp(model, "FF1") == 0) 382 plat_model = MODEL_FF1; 383 else if (strcmp(model, "FF2") == 0) 384 plat_model = MODEL_FF2; 385 else if (strncmp(model, "DC", 2) == 0) 386 plat_model = MODEL_DC; 387 } 388 389 error = mod_install(&modlinkage); 390 if (error != 0) { 391 if (&opl_get_mem_unum) 392 opl_get_mem_unum = NULL; 393 if (&opl_get_mem_sid) 394 opl_get_mem_sid = NULL; 395 if (&opl_get_mem_offset) 396 opl_get_mem_offset = NULL; 397 if (&opl_get_mem_addr) 398 opl_get_mem_addr = NULL; 399 mutex_destroy(&mcmutex); 400 mc_poll_fini(); 401 ddi_soft_state_fini(&mc_statep); 402 } 403 return (error); 404 } 405 406 int 407 _fini(void) 408 { 409 int error; 410 411 if ((error = mod_remove(&modlinkage)) != 0) 412 return (error); 413 414 if (&opl_get_mem_unum) 415 opl_get_mem_unum = NULL; 416 if (&opl_get_mem_sid) 417 opl_get_mem_sid = NULL; 418 if (&opl_get_mem_offset) 419 opl_get_mem_offset = NULL; 420 if (&opl_get_mem_addr) 421 opl_get_mem_addr = NULL; 422 423 mutex_destroy(&mcmutex); 424 mc_poll_fini(); 425 ddi_soft_state_fini(&mc_statep); 426 427 return (0); 428 } 429 430 int 431 _info(struct modinfo *modinfop) 432 { 433 return (mod_info(&modlinkage, modinfop)); 434 } 435 436 static void 437 mc_polling_thread() 438 { 439 mutex_enter(&mc_polling_lock); 440 mc_pollthr_running = 1; 441 while (!(mc_poll_cmd & MC_POLL_EXIT)) { 442 mc_polling(); 443 cv_timedwait(&mc_polling_cv, &mc_polling_lock, 444 ddi_get_lbolt() + mc_timeout_period); 445 } 446 mc_pollthr_running = 0; 447 448 /* 449 * signal if any one is waiting for this thread to exit. 450 */ 451 cv_signal(&mc_poll_exit_cv); 452 mutex_exit(&mc_polling_lock); 453 thread_exit(); 454 /* NOTREACHED */ 455 } 456 457 static int 458 mc_poll_init() 459 { 460 mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL); 461 cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL); 462 cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL); 463 return (0); 464 } 465 466 static void 467 mc_poll_fini() 468 { 469 mutex_enter(&mc_polling_lock); 470 if (mc_pollthr_running) { 471 mc_poll_cmd = MC_POLL_EXIT; 472 cv_signal(&mc_polling_cv); 473 while (mc_pollthr_running) { 474 cv_wait(&mc_poll_exit_cv, &mc_polling_lock); 475 } 476 } 477 mutex_exit(&mc_polling_lock); 478 mutex_destroy(&mc_polling_lock); 479 cv_destroy(&mc_polling_cv); 480 cv_destroy(&mc_poll_exit_cv); 481 } 482 483 static int 484 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 485 { 486 mc_opl_t *mcp; 487 int instance; 488 int rv; 489 490 /* get the instance of this devi */ 491 instance = ddi_get_instance(devi); 492 493 switch (cmd) { 494 case DDI_ATTACH: 495 break; 496 case DDI_RESUME: 497 mcp = ddi_get_soft_state(mc_statep, instance); 498 rv = mc_resume(mcp, MC_DRIVER_SUSPENDED); 499 return (rv); 500 default: 501 return (DDI_FAILURE); 502 } 503 504 if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS) 505 return (DDI_FAILURE); 506 507 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 508 goto bad; 509 } 510 511 if (mc_timeout_period == 0) { 512 mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi, 513 DDI_PROP_DONTPASS, "mc-timeout-interval-sec", 514 mc_patrol_interval_sec); 515 mc_timeout_period = drv_usectohz(1000000 * 516 mc_patrol_interval_sec / OPL_MAX_BOARDS); 517 } 518 519 /* set informations in mc state */ 520 mcp->mc_dip = devi; 521 522 if (mc_board_add(mcp)) 523 goto bad; 524 525 insert_mcp(mcp); 526 527 /* 528 * Start the polling thread if it is not running already. 529 */ 530 mutex_enter(&mc_polling_lock); 531 if (!mc_pollthr_running) { 532 (void) thread_create(NULL, 0, (void (*)())mc_polling_thread, 533 NULL, 0, &p0, TS_RUN, mc_poll_priority); 534 } 535 mutex_exit(&mc_polling_lock); 536 ddi_report_dev(devi); 537 538 return (DDI_SUCCESS); 539 540 bad: 541 ddi_soft_state_free(mc_statep, instance); 542 return (DDI_FAILURE); 543 } 544 545 /* ARGSUSED */ 546 static int 547 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 548 { 549 int rv; 550 int instance; 551 mc_opl_t *mcp; 552 553 /* get the instance of this devi */ 554 instance = ddi_get_instance(devi); 555 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 556 return (DDI_FAILURE); 557 } 558 559 switch (cmd) { 560 case DDI_SUSPEND: 561 rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED); 562 return (rv); 563 case DDI_DETACH: 564 break; 565 default: 566 return (DDI_FAILURE); 567 } 568 569 delete_mcp(mcp); 570 if (mc_board_del(mcp) != DDI_SUCCESS) { 571 return (DDI_FAILURE); 572 } 573 574 /* free up the soft state */ 575 ddi_soft_state_free(mc_statep, instance); 576 577 return (DDI_SUCCESS); 578 } 579 580 /* ARGSUSED */ 581 static int 582 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp) 583 { 584 return (0); 585 } 586 587 /* ARGSUSED */ 588 static int 589 mc_close(dev_t devp, int flag, int otyp, cred_t *credp) 590 { 591 return (0); 592 } 593 594 /* ARGSUSED */ 595 static int 596 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 597 int *rvalp) 598 { 599 #ifdef DEBUG 600 return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp)); 601 #else 602 return (ENXIO); 603 #endif 604 } 605 606 /* 607 * PA validity check: 608 * This function return 1 if the PA is a valid PA 609 * in the running Solaris instance i.e. in physinstall 610 * Otherwise, return 0. 611 */ 612 613 /* ARGSUSED */ 614 static int 615 pa_is_valid(mc_opl_t *mcp, uint64_t addr) 616 { 617 if (mcp->mlist == NULL) 618 mc_get_mlist(mcp); 619 620 if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) { 621 return (1); 622 } 623 return (0); 624 } 625 626 /* 627 * mac-pa translation routines. 628 * 629 * Input: mc driver state, (LSB#, Bank#, DIMM address) 630 * Output: physical address 631 * 632 * Valid - return value: 0 633 * Invalid - return value: -1 634 */ 635 static int 636 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa) 637 { 638 int i; 639 uint64_t pa_offset = 0; 640 int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1; 641 int bank = maddr->ma_bank; 642 mc_addr_t maddr1; 643 int bank0, bank1; 644 645 MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank, 646 maddr->ma_dimm_addr); 647 648 /* loc validity check */ 649 ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd); 650 ASSERT(bank >= 0 && OPL_BANK_MAX > bank); 651 652 /* Do translation */ 653 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 654 int pa_bit = 0; 655 int mc_bit = mcp->mc_trans_table[cs][i]; 656 if (mc_bit < MC_ADDRESS_BITS) { 657 pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1; 658 } else if (mc_bit == MP_NONE) { 659 pa_bit = 0; 660 } else if (mc_bit == MP_BANK_0) { 661 pa_bit = bank & 1; 662 } else if (mc_bit == MP_BANK_1) { 663 pa_bit = (bank >> 1) & 1; 664 } else if (mc_bit == MP_BANK_2) { 665 pa_bit = (bank >> 2) & 1; 666 } 667 pa_offset |= ((uint64_t)pa_bit) << i; 668 } 669 *pa = mcp->mc_start_address + pa_offset; 670 MC_LOG("pa = %lx\n", *pa); 671 672 if (pa_to_maddr(mcp, *pa, &maddr1) == -1) { 673 cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to " 674 "convert PA %lx\n", maddr->ma_bd, bank, 675 maddr->ma_dimm_addr, *pa); 676 return (-1); 677 } 678 679 /* 680 * In mirror mode, PA is always translated to the even bank. 681 */ 682 if (IS_MIRROR(mcp, maddr->ma_bank)) { 683 bank0 = maddr->ma_bank & ~(1); 684 bank1 = maddr1.ma_bank & ~(1); 685 } else { 686 bank0 = maddr->ma_bank; 687 bank1 = maddr1.ma_bank; 688 } 689 /* 690 * there is no need to check ma_bd because it is generated from 691 * mcp. They are the same. 692 */ 693 if ((bank0 == bank1) && (maddr->ma_dimm_addr == 694 maddr1.ma_dimm_addr)) { 695 return (0); 696 } else { 697 cmn_err(CE_WARN, "Translation error source /LSB%d/B%d/%x, " 698 "PA %lx, target /LSB%d/B%d/%x\n", maddr->ma_bd, bank, 699 maddr->ma_dimm_addr, *pa, maddr1.ma_bd, maddr1.ma_bank, 700 maddr1.ma_dimm_addr); 701 return (-1); 702 } 703 } 704 705 /* 706 * PA to CS (used by pa_to_maddr). 707 */ 708 static int 709 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset) 710 { 711 int i; 712 int cs = 1; 713 714 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 715 /* MAC address bit<29> is arranged on the same PA bit */ 716 /* on both table. So we may use any table. */ 717 if (mcp->mc_trans_table[0][i] == CS_SHIFT) { 718 cs = (pa_offset >> i) & 1; 719 break; 720 } 721 } 722 return (cs); 723 } 724 725 /* 726 * PA to DIMM (used by pa_to_maddr). 727 */ 728 /* ARGSUSED */ 729 static uint32_t 730 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset) 731 { 732 int i; 733 int cs = pa_to_cs(mcp, pa_offset); 734 uint32_t dimm_addr = 0; 735 736 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 737 int pa_bit_value = (pa_offset >> i) & 1; 738 int mc_bit = mcp->mc_trans_table[cs][i]; 739 if (mc_bit < MC_ADDRESS_BITS) { 740 dimm_addr |= pa_bit_value << mc_bit; 741 } 742 } 743 dimm_addr |= cs << CS_SHIFT; 744 return (dimm_addr); 745 } 746 747 /* 748 * PA to Bank (used by pa_to_maddr). 749 */ 750 static int 751 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset) 752 { 753 int i; 754 int cs = pa_to_cs(mcp, pa_offset); 755 int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT]; 756 757 758 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 759 int pa_bit_value = (pa_offset >> i) & 1; 760 int mc_bit = mcp->mc_trans_table[cs][i]; 761 switch (mc_bit) { 762 case MP_BANK_0: 763 bankno |= pa_bit_value; 764 break; 765 case MP_BANK_1: 766 bankno |= pa_bit_value << 1; 767 break; 768 case MP_BANK_2: 769 bankno |= pa_bit_value << 2; 770 break; 771 } 772 } 773 774 return (bankno); 775 } 776 777 /* 778 * PA to MAC address translation 779 * 780 * Input: MAC driver state, physicall adress 781 * Output: LSB#, Bank id, mac address 782 * 783 * Valid - return value: 0 784 * Invalid - return value: -1 785 */ 786 787 int 788 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr) 789 { 790 uint64_t pa_offset; 791 792 if (!mc_rangecheck_pa(mcp, pa)) 793 return (-1); 794 795 /* Do translation */ 796 pa_offset = pa - mcp->mc_start_address; 797 798 maddr->ma_bd = mcp->mc_board_num; 799 maddr->ma_phys_bd = mcp->mc_phys_board_num; 800 maddr->ma_bank = pa_to_bank(mcp, pa_offset); 801 maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset); 802 MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", pa_offset, maddr->ma_bd, 803 maddr->ma_bank, maddr->ma_dimm_addr); 804 return (0); 805 } 806 807 /* 808 * UNUM format for DC is "/CMUnn/MEMxyZ", where 809 * nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3. 810 * x = MAC 0..3 811 * y = 0..3 (slot info). 812 * Z = 'A' or 'B' 813 * 814 * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where 815 * x = 0..3 (MEMB number) 816 * y = 0..3 (slot info). 817 * Z = 'A' or 'B' 818 * 819 * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ" 820 * x = 0..7 (MEMB number) 821 * y = 0..3 (slot info). 822 * Z = 'A' or 'B' 823 */ 824 int 825 mc_set_mem_unum(char *buf, int buflen, int sb, int bank, 826 uint32_t mf_type, uint32_t d_slot) 827 { 828 char *dimmnm; 829 char memb_num; 830 int cs; 831 int i; 832 int j; 833 834 cs = SLOT_TO_CS(d_slot); 835 836 if (plat_model == MODEL_DC) { 837 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 838 mf_type == FLT_TYPE_PERMANENT_CE) { 839 i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 840 dimmnm = mc_dc_dimm_unum_table[i]; 841 snprintf(buf, buflen, "/%s%02d/MEM%s", 842 model_names[plat_model].unit_name, sb, dimmnm); 843 } else { 844 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 845 j = (cs == 0) ? i : i + 2; 846 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s", 847 model_names[plat_model].unit_name, sb, 848 mc_dc_dimm_unum_table[j], 849 mc_dc_dimm_unum_table[j + 1]); 850 } 851 } else { 852 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 853 mf_type == FLT_TYPE_PERMANENT_CE) { 854 i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 855 dimmnm = mc_ff_dimm_unum_table[i]; 856 memb_num = dimmnm[0]; 857 snprintf(buf, buflen, "/%s/%s%c/MEM%s", 858 model_names[plat_model].unit_name, 859 model_names[plat_model].mem_name, 860 memb_num, &dimmnm[1]); 861 } else { 862 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 863 j = (cs == 0) ? i : i + 2; 864 memb_num = mc_ff_dimm_unum_table[i][0], 865 snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s", 866 model_names[plat_model].unit_name, 867 model_names[plat_model].mem_name, memb_num, 868 &mc_ff_dimm_unum_table[j][1], 869 &mc_ff_dimm_unum_table[j + 1][1]); 870 } 871 } 872 return (0); 873 } 874 875 static void 876 mc_ereport_post(mc_aflt_t *mc_aflt) 877 { 878 char buf[FM_MAX_CLASS]; 879 char device_path[MAXPATHLEN]; 880 char sid[MAXPATHLEN]; 881 nv_alloc_t *nva = NULL; 882 nvlist_t *ereport, *detector, *resource; 883 errorq_elem_t *eqep; 884 int nflts; 885 mc_flt_stat_t *flt_stat; 886 int i, n; 887 int blen = MAXPATHLEN; 888 char *p, *s = NULL; 889 uint32_t values[2], synd[2], dslot[2]; 890 uint64_t offset = (uint64_t)-1; 891 int ret = -1; 892 893 if (panicstr) { 894 eqep = errorq_reserve(ereport_errorq); 895 if (eqep == NULL) 896 return; 897 ereport = errorq_elem_nvl(ereport_errorq, eqep); 898 nva = errorq_elem_nva(ereport_errorq, eqep); 899 } else { 900 ereport = fm_nvlist_create(nva); 901 } 902 903 /* 904 * Create the scheme "dev" FMRI. 905 */ 906 detector = fm_nvlist_create(nva); 907 resource = fm_nvlist_create(nva); 908 909 nflts = mc_aflt->mflt_nflts; 910 911 ASSERT(nflts >= 1 && nflts <= 2); 912 913 flt_stat = mc_aflt->mflt_stat[0]; 914 (void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path); 915 (void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL, 916 device_path, NULL); 917 918 /* 919 * Encode all the common data into the ereport. 920 */ 921 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", MC_OPL_ERROR_CLASS, 922 mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : MC_OPL_MI_SUBCLASS, 923 mc_aflt->mflt_erpt_class); 924 925 MC_LOG("mc_ereport_post: ereport %s\n", buf); 926 927 928 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 929 fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), detector, NULL); 930 931 /* 932 * Set payload. 933 */ 934 fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32, 935 flt_stat->mf_flt_maddr.ma_bd, NULL); 936 937 fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64, 938 flt_stat->mf_flt_paddr, NULL); 939 940 if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE || 941 flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 942 fm_payload_set(ereport, MC_OPL_FLT_TYPE, DATA_TYPE_UINT8, 943 ECC_STICKY, NULL); 944 } 945 946 for (i = 0; i < nflts; i++) 947 values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank; 948 949 fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, nflts, 950 values, NULL); 951 952 for (i = 0; i < nflts; i++) 953 values[i] = mc_aflt->mflt_stat[i]->mf_cntl; 954 955 fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, nflts, 956 values, NULL); 957 958 for (i = 0; i < nflts; i++) 959 values[i] = mc_aflt->mflt_stat[i]->mf_err_add; 960 961 /* offset is set only for PCE and ICE */ 962 if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_INTERMITTENT_CE || 963 mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) { 964 offset = values[0]; 965 966 } 967 fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, nflts, 968 values, NULL); 969 970 for (i = 0; i < nflts; i++) 971 values[i] = mc_aflt->mflt_stat[i]->mf_err_log; 972 973 fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, nflts, 974 values, NULL); 975 976 for (i = 0; i < nflts; i++) { 977 flt_stat = mc_aflt->mflt_stat[i]; 978 if (flt_stat->mf_errlog_valid) { 979 synd[i] = flt_stat->mf_synd; 980 dslot[i] = flt_stat->mf_dimm_slot; 981 values[i] = flt_stat->mf_dram_place; 982 } else { 983 synd[i] = 0; 984 dslot[i] = 0; 985 values[i] = 0; 986 } 987 } 988 989 fm_payload_set(ereport, MC_OPL_ERR_SYND, DATA_TYPE_UINT32_ARRAY, nflts, 990 synd, NULL); 991 992 fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, DATA_TYPE_UINT32_ARRAY, 993 nflts, dslot, NULL); 994 995 fm_payload_set(ereport, MC_OPL_ERR_DRAM, DATA_TYPE_UINT32_ARRAY, nflts, 996 values, NULL); 997 998 device_path[0] = 0; 999 p = &device_path[0]; 1000 sid[0] = 0; 1001 s = &sid[0]; 1002 ret = 0; 1003 1004 for (i = 0; i < nflts; i++) { 1005 int bank; 1006 1007 flt_stat = mc_aflt->mflt_stat[i]; 1008 bank = flt_stat->mf_flt_maddr.ma_bank; 1009 ret = mc_set_mem_unum(p + strlen(p), blen, 1010 flt_stat->mf_flt_maddr.ma_phys_bd, bank, flt_stat->mf_type, 1011 flt_stat->mf_dimm_slot); 1012 1013 if (ret != 0) { 1014 cmn_err(CE_WARN, 1015 "mc_ereport_post: Failed to determine the unum " 1016 "for board=%d bank=%d type=0x%x slot=0x%x", 1017 flt_stat->mf_flt_maddr.ma_bd, bank, 1018 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1019 continue; 1020 } 1021 n = strlen(device_path); 1022 blen = MAXPATHLEN - n; 1023 p = &device_path[n]; 1024 if (i < (nflts - 1)) { 1025 snprintf(p, blen, " "); 1026 blen--; 1027 p++; 1028 } 1029 1030 if (ret == 0) { 1031 ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s), 1032 blen, flt_stat->mf_flt_maddr.ma_phys_bd, bank, 1033 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1034 1035 } 1036 } 1037 1038 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1039 device_path, (ret == 0) ? sid : NULL, (ret == 0) ? offset : 1040 (uint64_t)-1); 1041 1042 fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, resource, 1043 NULL); 1044 1045 if (panicstr) { 1046 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1047 } else { 1048 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1049 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1050 fm_nvlist_destroy(detector, FM_NVA_FREE); 1051 fm_nvlist_destroy(resource, FM_NVA_FREE); 1052 } 1053 } 1054 1055 1056 static void 1057 mc_err_drain(mc_aflt_t *mc_aflt) 1058 { 1059 int rv; 1060 uint64_t pa = (uint64_t)(-1); 1061 int i; 1062 1063 MC_LOG("mc_err_drain: %s\n", mc_aflt->mflt_erpt_class); 1064 /* 1065 * we come here only when we have: 1066 * In mirror mode: MUE, SUE 1067 * In normal mode: UE, Permanent CE, Intermittent CE 1068 */ 1069 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1070 rv = mcaddr_to_pa(mc_aflt->mflt_mcp, 1071 &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa); 1072 1073 /* Ensure the pa is valid (not in isolated memory block) */ 1074 if (rv == 0 && pa_is_valid(mc_aflt->mflt_mcp, pa)) 1075 mc_aflt->mflt_stat[i]->mf_flt_paddr = pa; 1076 else 1077 mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1; 1078 } 1079 1080 MC_LOG("mc_err_drain:pa = %lx\n", pa); 1081 1082 switch (page_retire_check(pa, NULL)) { 1083 case 0: 1084 case EAGAIN: 1085 MC_LOG("Page retired or pending\n"); 1086 return; 1087 case EIO: 1088 /* 1089 * Do page retirement except for the PCE and ICE cases. 1090 * This is taken care by the OPL DE 1091 */ 1092 if (mc_aflt->mflt_stat[0]->mf_type != 1093 FLT_TYPE_INTERMITTENT_CE && 1094 mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) { 1095 MC_LOG("offline page at pa %lx error %x\n", pa, 1096 mc_aflt->mflt_pr); 1097 (void) page_retire(pa, mc_aflt->mflt_pr); 1098 } 1099 break; 1100 case EINVAL: 1101 default: 1102 /* 1103 * Some memory do not have page structure so 1104 * we keep going in case of EINVAL. 1105 */ 1106 break; 1107 } 1108 1109 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1110 mc_aflt_t mc_aflt0; 1111 if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) { 1112 mc_aflt0 = *mc_aflt; 1113 mc_aflt0.mflt_nflts = 1; 1114 mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i]; 1115 mc_ereport_post(&mc_aflt0); 1116 } 1117 } 1118 } 1119 1120 /* 1121 * The restart address is actually defined in unit of PA[37:6] 1122 * the mac patrol will convert that to dimm offset. If the 1123 * address is not in the bank, it will continue to search for 1124 * the next PA that is within the bank. 1125 * 1126 * Also the mac patrol scans the dimms based on PA, not 1127 * dimm offset. 1128 */ 1129 static int 1130 restart_patrol(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr_info) 1131 { 1132 uint64_t pa; 1133 int rv; 1134 1135 if (MC_REWRITE_MODE(mcp, bank)) { 1136 return (0); 1137 } 1138 if (rsaddr_info == NULL || (rsaddr_info->mi_valid == 0)) { 1139 MAC_PTRL_START(mcp, bank); 1140 return (0); 1141 } 1142 1143 rv = mcaddr_to_pa(mcp, &rsaddr_info->mi_restartaddr, &pa); 1144 if (rv != 0) { 1145 MC_LOG("cannot convert mcaddr to pa. use auto restart\n"); 1146 MAC_PTRL_START(mcp, bank); 1147 return (0); 1148 } 1149 1150 if (!mc_rangecheck_pa(mcp, pa)) { 1151 /* pa is not on this board, just retry */ 1152 cmn_err(CE_WARN, "restart_patrol: invalid address %lx " 1153 "on board %d\n", pa, mcp->mc_board_num); 1154 MAC_PTRL_START(mcp, bank); 1155 return (0); 1156 } 1157 1158 MC_LOG("restart_patrol: pa = %lx\n", pa); 1159 1160 if (!rsaddr_info->mi_injectrestart) { 1161 /* 1162 * For non-error injection restart we need to 1163 * determine if the current restart pa/page is 1164 * a "good" page. A "good" page is a page that 1165 * has not been page retired. If the current 1166 * page that contains the pa is "good", we will 1167 * do a HW auto restart and let HW patrol continue 1168 * where it last stopped. Most desired scenario. 1169 * 1170 * If the current page is not "good", we will advance 1171 * to the next page to find the next "good" page and 1172 * restart the patrol from there. 1173 */ 1174 int wrapcount = 0; 1175 uint64_t origpa = pa; 1176 while (wrapcount < 2) { 1177 if (!pa_is_valid(mcp, pa)) { 1178 /* 1179 * Not in physinstall - advance to the 1180 * next memory isolation blocksize 1181 */ 1182 MC_LOG("Invalid PA\n"); 1183 pa = roundup(pa + 1, mc_isolation_bsize); 1184 } else { 1185 int rv; 1186 if ((rv = page_retire_check(pa, NULL)) != 0 && 1187 rv != EAGAIN) { 1188 /* 1189 * The page is "good" (not retired), 1190 * we will use automatic HW restart 1191 * algorithm if this is the original 1192 * current starting page. 1193 */ 1194 if (pa == origpa) { 1195 MC_LOG("Page has no error. " 1196 "Auto restart\n"); 1197 MAC_PTRL_START(mcp, bank); 1198 return (0); 1199 } else { 1200 /* 1201 * found a subsequent good page 1202 */ 1203 break; 1204 } 1205 } 1206 1207 /* 1208 * Skip to the next page 1209 */ 1210 pa = roundup(pa + 1, PAGESIZE); 1211 MC_LOG("Skipping bad page to %lx\n", pa); 1212 } 1213 1214 /* Check to see if we hit the end of the memory range */ 1215 if (pa >= (mcp->mc_start_address + mcp->mc_size)) { 1216 MC_LOG("Wrap around\n"); 1217 pa = mcp->mc_start_address; 1218 wrapcount++; 1219 } 1220 } 1221 1222 if (wrapcount > 1) { 1223 MC_LOG("Failed to find a good page. Just restart\n"); 1224 MAC_PTRL_START(mcp, bank); 1225 return (0); 1226 } 1227 } 1228 1229 /* 1230 * We reached here either: 1231 * 1. We are doing an error injection restart that specify 1232 * the exact pa/page to restart. OR 1233 * 2. We found a subsequent good page different from the 1234 * original restart pa/page. 1235 * Restart MAC patrol: PA[37:6] 1236 */ 1237 MC_LOG("restart at pa = %lx\n", pa); 1238 ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa)); 1239 MAC_PTRL_START_ADD(mcp, bank); 1240 1241 return (0); 1242 } 1243 1244 static void 1245 mc_retry_info_put(mc_retry_info_t **q, mc_retry_info_t *p) 1246 { 1247 ASSERT(p != NULL); 1248 p->ri_next = *q; 1249 *q = p; 1250 } 1251 1252 static mc_retry_info_t * 1253 mc_retry_info_get(mc_retry_info_t **q) 1254 { 1255 mc_retry_info_t *p; 1256 1257 if ((p = *q) != NULL) { 1258 *q = p->ri_next; 1259 return (p); 1260 } else { 1261 return (NULL); 1262 } 1263 } 1264 1265 /* 1266 * Rewriting is used for two purposes. 1267 * - to correct the error in memory. 1268 * - to determine whether the error is permanent or intermittent. 1269 * It's done by writing the address in MAC_BANKm_REWRITE_ADD 1270 * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that, 1271 * REW_END (and REW_CE/REW_UE if some error detected) is set when 1272 * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM. 1273 * 1274 * Note that rewrite operation doesn't change RAW_UE to Marked UE. 1275 * Therefore, we use it only CE case. 1276 */ 1277 1278 static uint32_t 1279 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr, int retrying) 1280 { 1281 uint32_t cntl; 1282 int count = 0; 1283 int max_count; 1284 int retry_state; 1285 1286 if (retrying) 1287 max_count = 1; 1288 else 1289 max_count = mc_max_rewrite_loop; 1290 1291 retry_state = RETRY_STATE_PENDING; 1292 1293 if (!retrying && MC_REWRITE_MODE(mcp, bank)) { 1294 goto timeout; 1295 } 1296 1297 retry_state = RETRY_STATE_ACTIVE; 1298 1299 /* first wait to make sure PTRL_STATUS is 0 */ 1300 while (count++ < max_count) { 1301 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1302 if (!(cntl & MAC_CNTL_PTRL_STATUS)) { 1303 count = 0; 1304 break; 1305 } 1306 drv_usecwait(mc_rewrite_delay); 1307 } 1308 if (count >= max_count) 1309 goto timeout; 1310 1311 count = 0; 1312 1313 ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr); 1314 MAC_REW_REQ(mcp, bank); 1315 1316 retry_state = RETRY_STATE_REWRITE; 1317 1318 do { 1319 if (count++ > max_count) { 1320 goto timeout; 1321 } else { 1322 drv_usecwait(mc_rewrite_delay); 1323 } 1324 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1325 /* 1326 * If there are other MEMORY or PCI activities, this 1327 * will be BUSY, else it should be set immediately 1328 */ 1329 } while (!(cntl & MAC_CNTL_REW_END)); 1330 1331 MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS); 1332 return (cntl); 1333 timeout: 1334 mc_set_rewrite(mcp, bank, dimm_addr, retry_state); 1335 1336 return (0); 1337 } 1338 1339 void 1340 mc_clear_rewrite(mc_opl_t *mcp, int bank) 1341 { 1342 struct mc_bank *bankp; 1343 mc_retry_info_t *retry; 1344 uint32_t rew_addr; 1345 1346 bankp = &(mcp->mc_bank[bank]); 1347 retry = bankp->mcb_active; 1348 bankp->mcb_active = NULL; 1349 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 1350 1351 again: 1352 bankp->mcb_rewrite_count = 0; 1353 1354 while (retry = mc_retry_info_get(&bankp->mcb_retry_pending)) { 1355 rew_addr = retry->ri_addr; 1356 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 1357 if (do_rewrite(mcp, bank, rew_addr, 1) == 0) 1358 break; 1359 } 1360 1361 /* we break out if no more pending rewrite or we got timeout again */ 1362 1363 if (!bankp->mcb_active && !bankp->mcb_retry_pending) { 1364 if (!IS_MIRROR(mcp, bank)) { 1365 MC_CLEAR_REWRITE_MODE(mcp, bank); 1366 } else { 1367 int mbank = bank ^ 1; 1368 bankp = &(mcp->mc_bank[mbank]); 1369 if (!bankp->mcb_active && !bankp->mcb_retry_pending) { 1370 MC_CLEAR_REWRITE_MODE(mcp, bank); 1371 MC_CLEAR_REWRITE_MODE(mcp, mbank); 1372 } else { 1373 bank = mbank; 1374 goto again; 1375 } 1376 } 1377 } 1378 } 1379 1380 void 1381 mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state) 1382 { 1383 mc_retry_info_t *retry; 1384 struct mc_bank *bankp; 1385 1386 bankp = &mcp->mc_bank[bank]; 1387 1388 retry = mc_retry_info_get(&bankp->mcb_retry_freelist); 1389 1390 ASSERT(retry != NULL); 1391 1392 retry->ri_addr = addr; 1393 retry->ri_state = state; 1394 1395 MC_SET_REWRITE_MODE(mcp, bank); 1396 1397 if ((state > RETRY_STATE_PENDING)) { 1398 ASSERT(bankp->mcb_active == NULL); 1399 bankp->mcb_active = retry; 1400 } else { 1401 mc_retry_info_put(&bankp->mcb_retry_pending, retry); 1402 } 1403 1404 if (IS_MIRROR(mcp, bank)) { 1405 int mbank = bank ^1; 1406 MC_SET_REWRITE_MODE(mcp, mbank); 1407 } 1408 } 1409 1410 void 1411 mc_process_scf_log(mc_opl_t *mcp) 1412 { 1413 int count; 1414 int n = 0; 1415 scf_log_t *p; 1416 int bank; 1417 1418 for (bank = 0; bank < BANKNUM_PER_SB; bank++) { 1419 while ((p = mcp->mc_scf_log[bank]) != NULL && 1420 (n < mc_max_errlog_processed)) { 1421 ASSERT(bank == p->sl_bank); 1422 count = 0; 1423 while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank)) 1424 & MAC_STATIC_ERR_VLD)) { 1425 if (count++ >= (mc_max_scf_loop)) { 1426 break; 1427 } 1428 drv_usecwait(mc_scf_delay); 1429 } 1430 1431 if (count < mc_max_scf_loop) { 1432 ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank), 1433 p->sl_err_log); 1434 1435 ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank), 1436 p->sl_err_add|MAC_STATIC_ERR_VLD); 1437 mcp->mc_scf_retry[bank] = 0; 1438 } else { 1439 /* 1440 * if we try too many times, just drop the req 1441 */ 1442 if (mcp->mc_scf_retry[bank]++ <= 1443 mc_max_scf_retry) { 1444 return; 1445 } else { 1446 if ((++mc_pce_dropped & 0xff) == 0) { 1447 cmn_err(CE_WARN, "Cannot " 1448 "report Permanent CE to " 1449 "SCF\n"); 1450 } 1451 } 1452 } 1453 n++; 1454 mcp->mc_scf_log[bank] = p->sl_next; 1455 mcp->mc_scf_total[bank]--; 1456 ASSERT(mcp->mc_scf_total[bank] >= 0); 1457 kmem_free(p, sizeof (scf_log_t)); 1458 } 1459 } 1460 } 1461 void 1462 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank) 1463 { 1464 scf_log_t *p; 1465 1466 if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) { 1467 if ((++mc_pce_dropped & 0xff) == 0) { 1468 cmn_err(CE_WARN, "Too many Permanent CE requests.\n"); 1469 } 1470 return; 1471 } 1472 p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP); 1473 p->sl_next = 0; 1474 p->sl_err_add = flt_stat->mf_err_add; 1475 p->sl_err_log = flt_stat->mf_err_log; 1476 p->sl_bank = bank; 1477 1478 if (mcp->mc_scf_log[bank] == NULL) { 1479 /* 1480 * we rely on mc_scf_log to detect NULL queue. 1481 * mc_scf_log_tail is irrelevant is such case. 1482 */ 1483 mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p; 1484 } else { 1485 mcp->mc_scf_log_tail[bank]->sl_next = p; 1486 mcp->mc_scf_log_tail[bank] = p; 1487 } 1488 mcp->mc_scf_total[bank]++; 1489 } 1490 /* 1491 * This routine determines what kind of CE happens, intermittent 1492 * or permanent as follows. (See 4.7.3 in Columbus2 PRM.) 1493 * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register. 1494 * - If CE is still detected on the same address even after doing 1495 * rewrite operation twice, it is determined as permanent error. 1496 * - If error is not detected anymore, it is determined as intermittent 1497 * error. 1498 * - If UE is detected due to rewrite operation, it should be treated 1499 * as UE. 1500 */ 1501 1502 /* ARGSUSED */ 1503 static void 1504 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error) 1505 { 1506 uint32_t cntl; 1507 int i; 1508 1509 flt_stat->mf_type = FLT_TYPE_PERMANENT_CE; 1510 /* 1511 * rewrite request 1st time reads and correct error data 1512 * and write to DIMM. 2nd rewrite request must be issued 1513 * after REW_CE/UE/END is 0. When the 2nd request is completed, 1514 * if REW_CE = 1, then it is permanent CE. 1515 */ 1516 for (i = 0; i < 2; i++) { 1517 cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add, 0); 1518 1519 if (cntl == 0) { 1520 /* timeout case */ 1521 return; 1522 } 1523 /* 1524 * If the error becomes UE or CMPE 1525 * we return to the caller immediately. 1526 */ 1527 if (cntl & MAC_CNTL_REW_UE) { 1528 if (ptrl_error) 1529 flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE; 1530 else 1531 flt_stat->mf_cntl |= MAC_CNTL_MI_UE; 1532 flt_stat->mf_type = FLT_TYPE_UE; 1533 return; 1534 } 1535 if (cntl & MAC_CNTL_REW_CMPE) { 1536 if (ptrl_error) 1537 flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE; 1538 else 1539 flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE; 1540 flt_stat->mf_type = FLT_TYPE_CMPE; 1541 return; 1542 } 1543 } 1544 if (!(cntl & MAC_CNTL_REW_CE)) { 1545 flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE; 1546 } 1547 1548 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1549 /* report PERMANENT_CE to SP via SCF */ 1550 if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) { 1551 mc_queue_scf_log(mcp, flt_stat, bank); 1552 } 1553 } 1554 } 1555 1556 #define IS_CMPE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\ 1557 MAC_CNTL_MI_CMPE)) 1558 #define IS_UE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE)) 1559 #define IS_CE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE)) 1560 #define IS_OK(cntl, f) (!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \ 1561 MAC_CNTL_MI_ERRS))) 1562 1563 1564 static int 1565 IS_CE_ONLY(uint32_t cntl, int ptrl_error) 1566 { 1567 if (ptrl_error) { 1568 return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE); 1569 } else { 1570 return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE); 1571 } 1572 } 1573 1574 void 1575 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value) 1576 { 1577 int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank; 1578 1579 if (mcp->mc_speedup_period[ebank] > 0) 1580 value |= mc_max_speed; 1581 else 1582 value |= mcp->mc_speed; 1583 ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value); 1584 } 1585 1586 static void 1587 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1588 { 1589 flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1590 MAC_CNTL_PTRL_ERRS; 1591 flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank)); 1592 flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank)); 1593 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1594 flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num; 1595 flt_stat->mf_flt_maddr.ma_bank = bank; 1596 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1597 } 1598 1599 static void 1600 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1601 { 1602 uint32_t status, old_status; 1603 1604 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & MAC_CNTL_MI_ERRS; 1605 old_status = 0; 1606 1607 /* we keep reading until the status is stable */ 1608 while (old_status != status) { 1609 old_status = status; 1610 flt_stat->mf_err_add = LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank)); 1611 flt_stat->mf_err_log = LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank)); 1612 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1613 MAC_CNTL_MI_ERRS; 1614 if (status == old_status) { 1615 break; 1616 } 1617 } 1618 1619 flt_stat->mf_cntl = status; 1620 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1621 flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num; 1622 flt_stat->mf_flt_maddr.ma_bank = bank; 1623 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1624 } 1625 1626 1627 /* 1628 * Error philosophy for mirror mode: 1629 * 1630 * PTRL (The error address for both banks are same, since ptrl stops if it 1631 * detects error.) 1632 * - Compare error log CMPE. 1633 * 1634 * - UE-UE Report MUE. No rewrite. 1635 * 1636 * - UE-* UE-(CE/OK). Rewrite to scrub UE. Report SUE. 1637 * 1638 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1639 * If CE is permanent, inform SCF. Once for each 1640 * Dimm. If CE becomes UE or CMPE, go back to above. 1641 * 1642 * 1643 * MI (The error addresses for each bank are the same or different.) 1644 * - Compare error If addresses are the same. Just CMPE, so log CMPE. 1645 * If addresses are different (this could happen 1646 * as a result of scrubbing. Report each separately. 1647 * Only report error info on each side. 1648 * 1649 * - UE-UE Addresses are the same. Report MUE. 1650 * Addresses are different. Report SUE on each bank. 1651 * Rewrite to clear UE. 1652 * 1653 * - UE-* UE-(CE/OK) 1654 * Rewrite to clear UE. Report SUE for the bank. 1655 * 1656 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1657 * If CE becomes UE or CMPE, go back to above. 1658 * 1659 */ 1660 1661 static int 1662 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat) 1663 { 1664 int ptrl_error = mc_aflt->mflt_is_ptrl; 1665 int i; 1666 int rv = 0; 1667 int bank; 1668 int rewrite_timeout = 0; 1669 1670 MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n", 1671 flt_stat[0].mf_cntl, flt_stat[1].mf_cntl); 1672 1673 if (ptrl_error) { 1674 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) & 1675 MAC_CNTL_PTRL_ERRS) == 0) 1676 return (0); 1677 } else { 1678 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) & 1679 MAC_CNTL_MI_ERRS) == 0) 1680 return (0); 1681 } 1682 1683 /* 1684 * First we take care of the case of CE 1685 * because they can become UE or CMPE 1686 */ 1687 for (i = 0; i < 2; i++) { 1688 if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) { 1689 bank = flt_stat[i].mf_flt_maddr.ma_bank; 1690 MC_LOG("CE detected on bank %d\n", bank); 1691 mc_scrub_ce(mcp, bank, &flt_stat[i], ptrl_error); 1692 if (MC_REWRITE_ACTIVE(mcp, bank)) { 1693 rewrite_timeout = 1; 1694 } 1695 rv = 1; 1696 } 1697 } 1698 1699 if (rewrite_timeout) 1700 return (0); 1701 1702 /* The above scrubbing can turn CE into UE or CMPE */ 1703 1704 /* 1705 * Now we distinguish two cases: same address or not 1706 * the same address. It might seem more intuitive to 1707 * distinguish PTRL v.s. MI error but it is more 1708 * complicated that way. 1709 */ 1710 1711 if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) { 1712 1713 if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) || 1714 IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) { 1715 flt_stat[0].mf_type = FLT_TYPE_CMPE; 1716 flt_stat[1].mf_type = FLT_TYPE_CMPE; 1717 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1718 mc_aflt->mflt_nflts = 2; 1719 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1720 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1721 mc_aflt->mflt_pr = PR_UE; 1722 /* 1723 * Compare error is result of MAC internal error, so 1724 * simply log it instead of publishing an ereport. SCF 1725 * diagnoses all the MAC internal and its i/f error. 1726 */ 1727 MC_LOG("cmpe error detected\n"); 1728 return (1); 1729 } 1730 1731 if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) && 1732 IS_UE(flt_stat[1].mf_cntl, ptrl_error)) { 1733 /* Both side are UE's */ 1734 1735 MAC_SET_ERRLOG_INFO(&flt_stat[0]); 1736 MAC_SET_ERRLOG_INFO(&flt_stat[1]); 1737 MC_LOG("MUE detected\n"); 1738 flt_stat[0].mf_type = FLT_TYPE_MUE; 1739 flt_stat[1].mf_type = FLT_TYPE_MUE; 1740 mc_aflt->mflt_erpt_class = MC_OPL_MUE; 1741 mc_aflt->mflt_nflts = 2; 1742 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1743 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1744 mc_aflt->mflt_pr = PR_UE; 1745 mc_err_drain(mc_aflt); 1746 return (1); 1747 } 1748 1749 /* Now the only case is UE/CE, UE/OK, or don't care */ 1750 for (i = 0; i < 2; i++) { 1751 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1752 1753 /* rewrite can clear the one side UE error */ 1754 1755 if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) { 1756 (void) do_rewrite(mcp, 1757 flt_stat[i].mf_flt_maddr.ma_bank, 1758 flt_stat[i].mf_flt_maddr.ma_dimm_addr, 0); 1759 } 1760 flt_stat[i].mf_type = FLT_TYPE_UE; 1761 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1762 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1763 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1764 mc_aflt->mflt_nflts = 1; 1765 mc_aflt->mflt_pr = PR_MCE; 1766 mc_err_drain(mc_aflt); 1767 /* Once we hit a UE/CE or UE/OK case, done */ 1768 return (1); 1769 } 1770 } 1771 1772 } else { 1773 /* 1774 * addresses are different. That means errors 1775 * on the 2 banks are not related at all. 1776 */ 1777 for (i = 0; i < 2; i++) { 1778 if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) { 1779 flt_stat[i].mf_type = FLT_TYPE_CMPE; 1780 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1781 mc_aflt->mflt_nflts = 1; 1782 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1783 mc_aflt->mflt_pr = PR_UE; 1784 /* 1785 * Compare error is result of MAC internal 1786 * error, so simply log it instead of 1787 * publishing an ereport. SCF diagnoses all 1788 * the MAC internal and its interface error. 1789 */ 1790 MC_LOG("cmpe error detected\n"); 1791 /* no more report on this bank */ 1792 flt_stat[i].mf_cntl = 0; 1793 rv = 1; 1794 } 1795 } 1796 1797 /* rewrite can clear the one side UE error */ 1798 1799 for (i = 0; i < 2; i++) { 1800 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1801 (void) do_rewrite(mcp, 1802 flt_stat[i].mf_flt_maddr.ma_bank, 1803 flt_stat[i].mf_flt_maddr.ma_dimm_addr, 1804 0); 1805 flt_stat[i].mf_type = FLT_TYPE_UE; 1806 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1807 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1808 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1809 mc_aflt->mflt_nflts = 1; 1810 mc_aflt->mflt_pr = PR_MCE; 1811 mc_err_drain(mc_aflt); 1812 rv = 1; 1813 } 1814 } 1815 } 1816 return (rv); 1817 } 1818 static void 1819 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr) 1820 { 1821 mc_aflt_t mc_aflt; 1822 mc_flt_stat_t flt_stat[2], mi_flt_stat[2]; 1823 int i; 1824 int mi_valid; 1825 1826 ASSERT(rsaddr); 1827 1828 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1829 bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t)); 1830 bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t)); 1831 1832 1833 mc_aflt.mflt_mcp = mcp; 1834 mc_aflt.mflt_id = gethrtime(); 1835 1836 /* Now read all the registers into flt_stat */ 1837 1838 for (i = 0; i < 2; i++) { 1839 MC_LOG("Reading registers of bank %d\n", bank); 1840 /* patrol registers */ 1841 mc_read_ptrl_reg(mcp, bank, &flt_stat[i]); 1842 1843 /* 1844 * In mirror mode, it is possible that only one bank 1845 * may report the error. We need to check for it to 1846 * ensure we pick the right addr value for patrol restart. 1847 * Note that if both banks reported errors, we pick the 1848 * 2nd one. Both banks should reported the same error address. 1849 */ 1850 if (flt_stat[i].mf_cntl & MAC_CNTL_PTRL_ERRS) 1851 rsaddr->mi_restartaddr = flt_stat[i].mf_flt_maddr; 1852 1853 MC_LOG("ptrl registers cntl %x add %x log %x\n", 1854 flt_stat[i].mf_cntl, flt_stat[i].mf_err_add, 1855 flt_stat[i].mf_err_log); 1856 1857 /* MI registers */ 1858 mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]); 1859 1860 MC_LOG("MI registers cntl %x add %x log %x\n", 1861 mi_flt_stat[i].mf_cntl, mi_flt_stat[i].mf_err_add, 1862 mi_flt_stat[i].mf_err_log); 1863 1864 bank = bank^1; 1865 } 1866 1867 /* clear errors once we read all the registers */ 1868 MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1869 1870 MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1871 1872 /* Process MI errors first */ 1873 1874 /* if not error mode, cntl1 is 0 */ 1875 if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1876 (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1877 mi_flt_stat[0].mf_cntl = 0; 1878 1879 if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1880 (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1881 mi_flt_stat[1].mf_cntl = 0; 1882 1883 mc_aflt.mflt_is_ptrl = 0; 1884 mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]); 1885 1886 if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1887 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[0].mf_cntl & 1888 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 1889 (flt_stat[0].mf_err_add == mi_flt_stat[0].mf_err_add) && 1890 (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1891 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[1].mf_cntl & 1892 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 1893 (flt_stat[1].mf_err_add == mi_flt_stat[1].mf_err_add)) { 1894 #ifdef DEBUG 1895 MC_LOG("discarding PTRL error because " 1896 "it is the same as MI\n"); 1897 #endif 1898 rsaddr->mi_valid = mi_valid; 1899 return; 1900 } 1901 /* if not error mode, cntl1 is 0 */ 1902 if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1903 (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1904 flt_stat[0].mf_cntl = 0; 1905 1906 if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1907 (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1908 flt_stat[1].mf_cntl = 0; 1909 1910 mc_aflt.mflt_is_ptrl = 1; 1911 rsaddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]); 1912 } 1913 static int 1914 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt, 1915 mc_flt_stat_t *flt_stat) 1916 { 1917 int ptrl_error = mc_aflt->mflt_is_ptrl; 1918 int rv = 0; 1919 1920 mc_aflt->mflt_erpt_class = NULL; 1921 if (IS_UE(flt_stat->mf_cntl, ptrl_error)) { 1922 MC_LOG("UE detected\n"); 1923 flt_stat->mf_type = FLT_TYPE_UE; 1924 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1925 mc_aflt->mflt_pr = PR_UE; 1926 MAC_SET_ERRLOG_INFO(flt_stat); 1927 rv = 1; 1928 } else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) { 1929 MC_LOG("CE detected\n"); 1930 MAC_SET_ERRLOG_INFO(flt_stat); 1931 1932 /* Error type can change after scrubbing */ 1933 mc_scrub_ce(mcp, bank, flt_stat, ptrl_error); 1934 if (MC_REWRITE_ACTIVE(mcp, bank)) { 1935 return (0); 1936 } 1937 1938 if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE) { 1939 mc_aflt->mflt_erpt_class = MC_OPL_ICE; 1940 mc_aflt->mflt_pr = PR_MCE; 1941 } else if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1942 mc_aflt->mflt_erpt_class = MC_OPL_CE; 1943 mc_aflt->mflt_pr = PR_MCE; 1944 } else if (flt_stat->mf_type == FLT_TYPE_UE) { 1945 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1946 mc_aflt->mflt_pr = PR_UE; 1947 } 1948 rv = 1; 1949 } 1950 MC_LOG("mc_process_error: fault type %x erpt %s\n", flt_stat->mf_type, 1951 mc_aflt->mflt_erpt_class); 1952 if (mc_aflt->mflt_erpt_class) { 1953 mc_aflt->mflt_stat[0] = flt_stat; 1954 mc_aflt->mflt_nflts = 1; 1955 mc_err_drain(mc_aflt); 1956 } 1957 return (rv); 1958 } 1959 1960 static void 1961 mc_error_handler(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr) 1962 { 1963 mc_aflt_t mc_aflt; 1964 mc_flt_stat_t flt_stat, mi_flt_stat; 1965 int mi_valid; 1966 1967 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1968 bzero(&flt_stat, sizeof (mc_flt_stat_t)); 1969 bzero(&mi_flt_stat, sizeof (mc_flt_stat_t)); 1970 1971 mc_aflt.mflt_mcp = mcp; 1972 mc_aflt.mflt_id = gethrtime(); 1973 1974 /* patrol registers */ 1975 mc_read_ptrl_reg(mcp, bank, &flt_stat); 1976 1977 ASSERT(rsaddr); 1978 rsaddr->mi_restartaddr = flt_stat.mf_flt_maddr; 1979 1980 MC_LOG("ptrl registers cntl %x add %x log %x\n", flt_stat.mf_cntl, 1981 flt_stat.mf_err_add, flt_stat.mf_err_log); 1982 1983 /* MI registers */ 1984 mc_read_mi_reg(mcp, bank, &mi_flt_stat); 1985 1986 1987 MC_LOG("MI registers cntl %x add %x log %x\n", mi_flt_stat.mf_cntl, 1988 mi_flt_stat.mf_err_add, mi_flt_stat.mf_err_log); 1989 1990 /* clear errors once we read all the registers */ 1991 MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1992 1993 mc_aflt.mflt_is_ptrl = 0; 1994 if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) && 1995 ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 1996 ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 1997 mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat); 1998 } 1999 2000 if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >> 2001 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat.mf_cntl & 2002 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 2003 (flt_stat.mf_err_add == mi_flt_stat.mf_err_add)) { 2004 #ifdef DEBUG 2005 MC_LOG("discarding PTRL error because " 2006 "it is the same as MI\n"); 2007 #endif 2008 rsaddr->mi_valid = mi_valid; 2009 return; 2010 } 2011 2012 mc_aflt.mflt_is_ptrl = 1; 2013 if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) && 2014 ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 2015 ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 2016 rsaddr->mi_valid = mc_process_error(mcp, bank, &mc_aflt, 2017 &flt_stat); 2018 } 2019 } 2020 /* 2021 * memory patrol error handling algorithm: 2022 * timeout() is used to do periodic polling 2023 * This is the flow chart. 2024 * timeout -> 2025 * mc_check_errors() 2026 * if memory bank is installed, read the status register 2027 * if any error bit is set, 2028 * -> mc_error_handler() 2029 * -> read all error registers 2030 * -> mc_process_error() 2031 * determine error type 2032 * rewrite to clear error or scrub to determine CE type 2033 * inform SCF on permanent CE 2034 * -> mc_err_drain 2035 * page offline processing 2036 * -> mc_ereport_post() 2037 */ 2038 2039 static void 2040 mc_process_rewrite(mc_opl_t *mcp, int bank) 2041 { 2042 uint32_t rew_addr, cntl; 2043 mc_retry_info_t *retry; 2044 struct mc_bank *bankp; 2045 2046 bankp = &(mcp->mc_bank[bank]); 2047 retry = bankp->mcb_active; 2048 if (retry == NULL) 2049 return; 2050 2051 if (retry->ri_state <= RETRY_STATE_ACTIVE) { 2052 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 2053 if (cntl & MAC_CNTL_PTRL_STATUS) 2054 return; 2055 rew_addr = retry->ri_addr; 2056 ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), rew_addr); 2057 MAC_REW_REQ(mcp, bank); 2058 2059 retry->ri_state = RETRY_STATE_REWRITE; 2060 } 2061 2062 cntl = ldphysio(MAC_PTRL_CNTL(mcp, bank)); 2063 2064 if (cntl & MAC_CNTL_REW_END) { 2065 MAC_CLEAR_ERRS(mcp, bank, 2066 MAC_CNTL_REW_ERRS); 2067 mc_clear_rewrite(mcp, bank); 2068 } else { 2069 /* 2070 * If the rewrite does not complete in 2071 * 1 hour, we have to consider this a HW 2072 * failure. However, there is no recovery 2073 * mechanism. The only thing we can do 2074 * to to print a warning message to the 2075 * console. We continue to increment the 2076 * counter but we only print the message 2077 * once. It will take the counter a long 2078 * time to wrap around and the user might 2079 * see a second message. In practice, 2080 * we have never hit this condition but 2081 * we have to keep the code here just in case. 2082 */ 2083 if (++mcp->mc_bank[bank].mcb_rewrite_count 2084 == mc_max_rewrite_retry) { 2085 cmn_err(CE_WARN, "Memory patrol feature is" 2086 " partly suspended on /LSB%d/B%d" 2087 " due to heavy memory load," 2088 " and it will restart" 2089 " automatically.\n", mcp->mc_board_num, 2090 bank); 2091 } 2092 } 2093 } 2094 2095 static void 2096 mc_check_errors_func(mc_opl_t *mcp) 2097 { 2098 mc_rsaddr_info_t rsaddr_info; 2099 int i, error_count = 0; 2100 uint32_t stat, cntl; 2101 int running; 2102 int wrapped; 2103 int ebk; 2104 2105 /* 2106 * scan errors. 2107 */ 2108 if (mcp->mc_status & MC_MEMORYLESS) 2109 return; 2110 2111 for (i = 0; i < BANKNUM_PER_SB; i++) { 2112 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2113 if (MC_REWRITE_ACTIVE(mcp, i)) { 2114 mc_process_rewrite(mcp, i); 2115 } 2116 stat = ldphysio(MAC_PTRL_STAT(mcp, i)); 2117 cntl = ldphysio(MAC_PTRL_CNTL(mcp, i)); 2118 running = cntl & MAC_CNTL_PTRL_START; 2119 wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX; 2120 2121 /* Compute the effective bank idx */ 2122 ebk = (IS_MIRROR(mcp, i)) ? MIRROR_IDX(i) : i; 2123 2124 if (mc_debug_show_all || stat) { 2125 MC_LOG("/LSB%d/B%d stat %x cntl %x\n", 2126 mcp->mc_board_num, i, stat, cntl); 2127 } 2128 2129 /* 2130 * Update stats and reset flag if the HW patrol 2131 * wrapped around in its scan. 2132 */ 2133 if (wrapped) { 2134 MAC_CLEAR_MAX(mcp, i); 2135 mcp->mc_period[ebk]++; 2136 if (IS_MIRROR(mcp, i)) 2137 MC_LOG("mirror mc period %ld on " 2138 "/LSB%d/B%d\n", mcp->mc_period[ebk], 2139 mcp->mc_board_num, i); 2140 else { 2141 MC_LOG("mc period %ld on " 2142 "/LSB%d/B%d\n", mcp->mc_period[ebk], 2143 mcp->mc_board_num, i); 2144 } 2145 } 2146 2147 if (running) { 2148 /* 2149 * Mac patrol HW is still running. 2150 * Normally when an error is detected, 2151 * the HW patrol will stop so that we 2152 * can collect error data for reporting. 2153 * Certain errors (MI errors) detected may not 2154 * cause the HW patrol to stop which is a 2155 * problem since we cannot read error data while 2156 * the HW patrol is running. SW is not allowed 2157 * to stop the HW patrol while it is running 2158 * as it may cause HW inconsistency. This is 2159 * described in a HW errata. 2160 * In situations where we detected errors 2161 * that may not cause the HW patrol to stop. 2162 * We speed up the HW patrol scanning in 2163 * the hope that it will find the 'real' PTRL 2164 * errors associated with the previous errors 2165 * causing the HW to finally stop so that we 2166 * can do the reporting. 2167 */ 2168 /* 2169 * Check to see if we did speed up 2170 * the HW patrol due to previous errors 2171 * detected that did not cause the patrol 2172 * to stop. We only do it if HW patrol scan 2173 * wrapped (counted as completing a 'period'). 2174 */ 2175 if (mcp->mc_speedup_period[ebk] > 0) { 2176 if (wrapped && 2177 (--mcp->mc_speedup_period[ebk] == 2178 0)) { 2179 /* 2180 * We did try to speed up. 2181 * The speed up period has 2182 * expired and the HW patrol 2183 * is still running. The 2184 * errors must be intermittent. 2185 * We have no choice but to 2186 * ignore them, reset the scan 2187 * speed to normal and clear 2188 * the MI error bits. For 2189 * mirror mode, we need to 2190 * clear errors on both banks. 2191 */ 2192 MC_LOG("Clearing MI errors\n"); 2193 MAC_CLEAR_ERRS(mcp, i, 2194 MAC_CNTL_MI_ERRS); 2195 2196 if (IS_MIRROR(mcp, i)) { 2197 MC_LOG("Clearing " 2198 "Mirror MI errs\n"); 2199 MAC_CLEAR_ERRS(mcp, 2200 i^1, 2201 MAC_CNTL_MI_ERRS); 2202 } 2203 } 2204 } else if (stat & MAC_STAT_MI_ERRS) { 2205 /* 2206 * MI errors detected but we cannot 2207 * report them since the HW patrol 2208 * is still running. 2209 * We will attempt to speed up the 2210 * scanning and hopefully the HW 2211 * can detect PRTL errors at the same 2212 * location that cause the HW patrol 2213 * to stop. 2214 */ 2215 mcp->mc_speedup_period[ebk] = 2; 2216 MAC_CMD(mcp, i, 0); 2217 } 2218 } else if (stat & (MAC_STAT_PTRL_ERRS | 2219 MAC_STAT_MI_ERRS)) { 2220 /* 2221 * HW Patrol has stopped and we found errors. 2222 * Proceed to collect and report error info. 2223 */ 2224 mcp->mc_speedup_period[ebk] = 0; 2225 rsaddr_info.mi_valid = 0; 2226 rsaddr_info.mi_injectrestart = 0; 2227 if (IS_MIRROR(mcp, i)) { 2228 mc_error_handler_mir(mcp, i, 2229 &rsaddr_info); 2230 } else { 2231 mc_error_handler(mcp, i, &rsaddr_info); 2232 } 2233 2234 error_count++; 2235 restart_patrol(mcp, i, &rsaddr_info); 2236 } else { 2237 /* 2238 * HW patrol scan has apparently stopped 2239 * but no errors detected/flagged. 2240 * Restart the HW patrol just to be sure. 2241 * In mirror mode, the odd bank might have 2242 * reported errors that caused the patrol to 2243 * stop. We'll defer the restart to the odd 2244 * bank in this case. 2245 */ 2246 if (!IS_MIRROR(mcp, i) || (i & 0x1)) 2247 restart_patrol(mcp, i, NULL); 2248 } 2249 } 2250 } 2251 if (error_count > 0) 2252 mcp->mc_last_error += error_count; 2253 else 2254 mcp->mc_last_error = 0; 2255 } 2256 2257 /* 2258 * mc_polling -- Check errors for only one instance, 2259 * but process errors for all instances to make sure we drain the errors 2260 * faster than they can be accumulated. 2261 * 2262 * Polling on each board should be done only once per each 2263 * mc_patrol_interval_sec. This is equivalent to setting mc_tick_left 2264 * to OPL_MAX_BOARDS and decrement by 1 on each timeout. 2265 * Once mc_tick_left becomes negative, the board becomes a candidate 2266 * for polling because it has waited for at least 2267 * mc_patrol_interval_sec's long. If mc_timeout_period is calculated 2268 * differently, this has to be updated accordingly. 2269 */ 2270 2271 static void 2272 mc_polling(void) 2273 { 2274 int i, scan_error; 2275 mc_opl_t *mcp; 2276 2277 2278 scan_error = 1; 2279 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2280 mutex_enter(&mcmutex); 2281 if ((mcp = mc_instances[i]) == NULL) { 2282 mutex_exit(&mcmutex); 2283 continue; 2284 } 2285 mutex_enter(&mcp->mc_lock); 2286 mutex_exit(&mcmutex); 2287 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2288 mutex_exit(&mcp->mc_lock); 2289 continue; 2290 } 2291 if (scan_error && mcp->mc_tick_left <= 0) { 2292 mc_check_errors_func((void *)mcp); 2293 mcp->mc_tick_left = OPL_MAX_BOARDS; 2294 scan_error = 0; 2295 } else { 2296 mcp->mc_tick_left--; 2297 } 2298 mc_process_scf_log(mcp); 2299 mutex_exit(&mcp->mc_lock); 2300 } 2301 } 2302 2303 static void 2304 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr) 2305 { 2306 maddr->ma_bd = mcp->mc_board_num; 2307 maddr->ma_bank = bank; 2308 maddr->ma_dimm_addr = 0; 2309 } 2310 2311 typedef struct mc_mem_range { 2312 uint64_t addr; 2313 uint64_t size; 2314 } mc_mem_range_t; 2315 2316 static int 2317 get_base_address(mc_opl_t *mcp) 2318 { 2319 mc_mem_range_t *mem_range; 2320 int len; 2321 2322 if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2323 "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) { 2324 return (DDI_FAILURE); 2325 } 2326 2327 mcp->mc_start_address = mem_range->addr; 2328 mcp->mc_size = mem_range->size; 2329 2330 kmem_free(mem_range, len); 2331 return (DDI_SUCCESS); 2332 } 2333 2334 struct mc_addr_spec { 2335 uint32_t bank; 2336 uint32_t phys_hi; 2337 uint32_t phys_lo; 2338 }; 2339 2340 #define REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo) 2341 2342 static char *mc_tbl_name[] = { 2343 "cs0-mc-pa-trans-table", 2344 "cs1-mc-pa-trans-table" 2345 }; 2346 2347 /* 2348 * This routine performs a rangecheck for a given PA 2349 * to see if it belongs to the memory range for this board. 2350 * Return 1 if it is valid (within the range) and 0 otherwise 2351 */ 2352 static int 2353 mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa) 2354 { 2355 if ((pa < mcp->mc_start_address) || (mcp->mc_start_address + 2356 mcp->mc_size <= pa)) 2357 return (0); 2358 else 2359 return (1); 2360 } 2361 2362 static void 2363 mc_memlist_delete(struct memlist *mlist) 2364 { 2365 struct memlist *ml; 2366 2367 for (ml = mlist; ml; ml = mlist) { 2368 mlist = ml->next; 2369 kmem_free(ml, sizeof (struct memlist)); 2370 } 2371 } 2372 2373 static struct memlist * 2374 mc_memlist_dup(struct memlist *mlist) 2375 { 2376 struct memlist *hl = NULL, *tl, **mlp; 2377 2378 if (mlist == NULL) 2379 return (NULL); 2380 2381 mlp = &hl; 2382 tl = *mlp; 2383 for (; mlist; mlist = mlist->next) { 2384 *mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP); 2385 (*mlp)->address = mlist->address; 2386 (*mlp)->size = mlist->size; 2387 (*mlp)->prev = tl; 2388 tl = *mlp; 2389 mlp = &((*mlp)->next); 2390 } 2391 *mlp = NULL; 2392 2393 return (hl); 2394 } 2395 2396 2397 static struct memlist * 2398 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len) 2399 { 2400 uint64_t end; 2401 struct memlist *ml, *tl, *nlp; 2402 2403 if (mlist == NULL) 2404 return (NULL); 2405 2406 end = base + len; 2407 if ((end <= mlist->address) || (base == end)) 2408 return (mlist); 2409 2410 for (tl = ml = mlist; ml; tl = ml, ml = nlp) { 2411 uint64_t mend; 2412 2413 nlp = ml->next; 2414 2415 if (end <= ml->address) 2416 break; 2417 2418 mend = ml->address + ml->size; 2419 if (base < mend) { 2420 if (base <= ml->address) { 2421 ml->address = end; 2422 if (end >= mend) 2423 ml->size = 0ull; 2424 else 2425 ml->size = mend - ml->address; 2426 } else { 2427 ml->size = base - ml->address; 2428 if (end < mend) { 2429 struct memlist *nl; 2430 /* 2431 * splitting an memlist entry. 2432 */ 2433 nl = kmem_alloc(sizeof (struct memlist), 2434 KM_SLEEP); 2435 nl->address = end; 2436 nl->size = mend - nl->address; 2437 if ((nl->next = nlp) != NULL) 2438 nlp->prev = nl; 2439 nl->prev = ml; 2440 ml->next = nl; 2441 nlp = nl; 2442 } 2443 } 2444 if (ml->size == 0ull) { 2445 if (ml == mlist) { 2446 if ((mlist = nlp) != NULL) 2447 nlp->prev = NULL; 2448 kmem_free(ml, sizeof (struct memlist)); 2449 if (mlist == NULL) 2450 break; 2451 ml = nlp; 2452 } else { 2453 if ((tl->next = nlp) != NULL) 2454 nlp->prev = tl; 2455 kmem_free(ml, sizeof (struct memlist)); 2456 ml = tl; 2457 } 2458 } 2459 } 2460 } 2461 2462 return (mlist); 2463 } 2464 2465 static void 2466 mc_get_mlist(mc_opl_t *mcp) 2467 { 2468 struct memlist *mlist; 2469 2470 memlist_read_lock(); 2471 mlist = mc_memlist_dup(phys_install); 2472 memlist_read_unlock(); 2473 2474 if (mlist) { 2475 mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address); 2476 } 2477 2478 if (mlist) { 2479 uint64_t startpa, endpa; 2480 2481 startpa = mcp->mc_start_address + mcp->mc_size; 2482 endpa = ptob(physmax + 1); 2483 if (endpa > startpa) { 2484 mlist = mc_memlist_del_span(mlist, startpa, 2485 endpa - startpa); 2486 } 2487 } 2488 2489 if (mlist) { 2490 mcp->mlist = mlist; 2491 } 2492 } 2493 2494 int 2495 mc_board_add(mc_opl_t *mcp) 2496 { 2497 struct mc_addr_spec *macaddr; 2498 cs_status_t *cs_status; 2499 int len, len1, i, bk, cc; 2500 mc_rsaddr_info_t rsaddr; 2501 uint32_t mirr; 2502 int nbanks = 0; 2503 uint64_t nbytes = 0; 2504 int mirror_mode = 0; 2505 int ret; 2506 2507 /* 2508 * Get configurations from "pseudo-mc" node which includes: 2509 * board# : LSB number 2510 * mac-addr : physical base address of MAC registers 2511 * csX-mac-pa-trans-table: translation table from DIMM address 2512 * to physical address or vice versa. 2513 */ 2514 mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip, 2515 DDI_PROP_DONTPASS, "board#", -1); 2516 2517 if (mcp->mc_board_num == -1) { 2518 return (DDI_FAILURE); 2519 } 2520 2521 /* 2522 * Get start address in this CAB. It can be gotten from 2523 * "sb-mem-ranges" property. 2524 */ 2525 2526 if (get_base_address(mcp) == DDI_FAILURE) { 2527 return (DDI_FAILURE); 2528 } 2529 /* get mac-pa trans tables */ 2530 for (i = 0; i < MC_TT_CS; i++) { 2531 len = MC_TT_ENTRIES; 2532 cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip, 2533 DDI_PROP_DONTPASS, mc_tbl_name[i], 2534 (caddr_t)mcp->mc_trans_table[i], &len); 2535 2536 if (cc != DDI_SUCCESS) { 2537 bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES); 2538 } 2539 } 2540 mcp->mlist = NULL; 2541 2542 mc_get_mlist(mcp); 2543 2544 /* initialize bank informations */ 2545 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2546 "mc-addr", (caddr_t)&macaddr, &len); 2547 if (cc != DDI_SUCCESS) { 2548 cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc); 2549 return (DDI_FAILURE); 2550 } 2551 2552 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2553 "cs-status", (caddr_t)&cs_status, &len1); 2554 2555 if (cc != DDI_SUCCESS) { 2556 if (len > 0) 2557 kmem_free(macaddr, len); 2558 cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc); 2559 return (DDI_FAILURE); 2560 } 2561 /* get the physical board number for a given logical board number */ 2562 mcp->mc_phys_board_num = mc_opl_get_physical_board(mcp->mc_board_num); 2563 2564 if (mcp->mc_phys_board_num < 0) { 2565 if (len > 0) 2566 kmem_free(macaddr, len); 2567 cmn_err(CE_WARN, "Unable to obtain the physical board number"); 2568 return (DDI_FAILURE); 2569 } 2570 2571 mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL); 2572 2573 for (i = 0; i < len1 / sizeof (cs_status_t); i++) { 2574 nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) | 2575 ((uint64_t)cs_status[i].cs_avail_low); 2576 } 2577 if (len1 > 0) 2578 kmem_free(cs_status, len1); 2579 nbanks = len / sizeof (struct mc_addr_spec); 2580 2581 if (nbanks > 0) 2582 nbytes /= nbanks; 2583 else { 2584 /* No need to free macaddr because len must be 0 */ 2585 mcp->mc_status |= MC_MEMORYLESS; 2586 return (DDI_SUCCESS); 2587 } 2588 2589 for (i = 0; i < BANKNUM_PER_SB; i++) { 2590 mcp->mc_scf_retry[i] = 0; 2591 mcp->mc_period[i] = 0; 2592 mcp->mc_speedup_period[i] = 0; 2593 } 2594 2595 /* 2596 * Get the memory size here. Let it be B (bytes). 2597 * Let T be the time in u.s. to scan 64 bytes. 2598 * If we want to complete 1 round of scanning in P seconds. 2599 * 2600 * B * T * 10^(-6) = P 2601 * --------------- 2602 * 64 2603 * 2604 * T = P * 64 * 10^6 2605 * ------------- 2606 * B 2607 * 2608 * = P * 64 * 10^6 2609 * ------------- 2610 * B 2611 * 2612 * The timing bits are set in PTRL_CNTL[28:26] where 2613 * 2614 * 0 - 1 m.s 2615 * 1 - 512 u.s. 2616 * 10 - 256 u.s. 2617 * 11 - 128 u.s. 2618 * 100 - 64 u.s. 2619 * 101 - 32 u.s. 2620 * 110 - 0 u.s. 2621 * 111 - reserved. 2622 * 2623 * 2624 * a[0] = 110, a[1] = 101, ... a[6] = 0 2625 * 2626 * cs-status property is int x 7 2627 * 0 - cs# 2628 * 1 - cs-status 2629 * 2 - cs-avail.hi 2630 * 3 - cs-avail.lo 2631 * 4 - dimm-capa.hi 2632 * 5 - dimm-capa.lo 2633 * 6 - #of dimms 2634 */ 2635 2636 if (nbytes > 0) { 2637 int i; 2638 uint64_t ms; 2639 ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes; 2640 mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds; 2641 for (i = 0; i < MC_MAX_SPEEDS - 1; i++) { 2642 if (ms < mc_scan_speeds[i + 1].mc_period) { 2643 mcp->mc_speed = mc_scan_speeds[i].mc_speeds; 2644 break; 2645 } 2646 } 2647 } else 2648 mcp->mc_speed = 0; 2649 2650 2651 for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) { 2652 struct mc_bank *bankp; 2653 mc_retry_info_t *retry; 2654 uint32_t reg; 2655 int k; 2656 2657 /* 2658 * setup bank 2659 */ 2660 bk = macaddr[i].bank; 2661 bankp = &(mcp->mc_bank[bk]); 2662 bankp->mcb_status = BANK_INSTALLED; 2663 bankp->mcb_reg_base = REGS_PA(macaddr, i); 2664 2665 bankp->mcb_retry_freelist = NULL; 2666 bankp->mcb_retry_pending = NULL; 2667 bankp->mcb_active = NULL; 2668 retry = &bankp->mcb_retry_infos[0]; 2669 for (k = 0; k < MC_RETRY_COUNT; k++, retry++) { 2670 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 2671 } 2672 2673 reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk)); 2674 bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS); 2675 2676 /* 2677 * check if mirror mode 2678 */ 2679 mirr = LD_MAC_REG(MAC_MIRR(mcp, bk)); 2680 2681 if (mirr & MAC_MIRR_MIRROR_MODE) { 2682 MC_LOG("Mirror -> /LSB%d/B%d\n", mcp->mc_board_num, 2683 bk); 2684 bankp->mcb_status |= BANK_MIRROR_MODE; 2685 mirror_mode = 1; 2686 /* 2687 * The following bit is only used for 2688 * error injection. We should clear it 2689 */ 2690 if (mirr & MAC_MIRR_BANK_EXCLUSIVE) 2691 ST_MAC_REG(MAC_MIRR(mcp, bk), 0); 2692 } 2693 2694 /* 2695 * restart if not mirror mode or the other bank 2696 * of the mirror is not running 2697 */ 2698 if (!(mirr & MAC_MIRR_MIRROR_MODE) || 2699 !(mcp->mc_bank[bk^1].mcb_status & BANK_PTRL_RUNNING)) { 2700 MC_LOG("Starting up /LSB%d/B%d\n", mcp->mc_board_num, 2701 bk); 2702 get_ptrl_start_address(mcp, bk, &rsaddr.mi_restartaddr); 2703 rsaddr.mi_valid = 0; 2704 rsaddr.mi_injectrestart = 0; 2705 restart_patrol(mcp, bk, &rsaddr); 2706 } else { 2707 MC_LOG("Not starting up /LSB%d/B%d\n", 2708 mcp->mc_board_num, bk); 2709 } 2710 bankp->mcb_status |= BANK_PTRL_RUNNING; 2711 } 2712 if (len > 0) 2713 kmem_free(macaddr, len); 2714 2715 ret = ndi_prop_update_int(DDI_DEV_T_NONE, mcp->mc_dip, "mirror-mode", 2716 mirror_mode); 2717 if (ret != DDI_PROP_SUCCESS) { 2718 cmn_err(CE_WARN, "Unable to update mirror-mode property"); 2719 } 2720 2721 mcp->mc_dimm_list = mc_get_dimm_list(mcp); 2722 2723 /* 2724 * set interval in HZ. 2725 */ 2726 mcp->mc_last_error = 0; 2727 2728 /* restart memory patrol checking */ 2729 mcp->mc_status |= MC_POLL_RUNNING; 2730 2731 return (DDI_SUCCESS); 2732 } 2733 2734 int 2735 mc_board_del(mc_opl_t *mcp) 2736 { 2737 int i; 2738 scf_log_t *p; 2739 2740 /* 2741 * cleanup mac state 2742 */ 2743 mutex_enter(&mcp->mc_lock); 2744 if (mcp->mc_status & MC_MEMORYLESS) { 2745 mutex_exit(&mcp->mc_lock); 2746 mutex_destroy(&mcp->mc_lock); 2747 return (DDI_SUCCESS); 2748 } 2749 for (i = 0; i < BANKNUM_PER_SB; i++) { 2750 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2751 mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED; 2752 } 2753 } 2754 2755 /* stop memory patrol checking */ 2756 mcp->mc_status &= ~MC_POLL_RUNNING; 2757 2758 /* just throw away all the scf logs */ 2759 for (i = 0; i < BANKNUM_PER_SB; i++) { 2760 while ((p = mcp->mc_scf_log[i]) != NULL) { 2761 mcp->mc_scf_log[i] = p->sl_next; 2762 mcp->mc_scf_total[i]--; 2763 kmem_free(p, sizeof (scf_log_t)); 2764 } 2765 } 2766 2767 if (mcp->mlist) 2768 mc_memlist_delete(mcp->mlist); 2769 2770 if (mcp->mc_dimm_list) 2771 mc_free_dimm_list(mcp->mc_dimm_list); 2772 2773 mutex_exit(&mcp->mc_lock); 2774 2775 mutex_destroy(&mcp->mc_lock); 2776 return (DDI_SUCCESS); 2777 } 2778 2779 int 2780 mc_suspend(mc_opl_t *mcp, uint32_t flag) 2781 { 2782 /* stop memory patrol checking */ 2783 mutex_enter(&mcp->mc_lock); 2784 if (mcp->mc_status & MC_MEMORYLESS) { 2785 mutex_exit(&mcp->mc_lock); 2786 return (DDI_SUCCESS); 2787 } 2788 2789 mcp->mc_status &= ~MC_POLL_RUNNING; 2790 2791 mcp->mc_status |= flag; 2792 mutex_exit(&mcp->mc_lock); 2793 2794 return (DDI_SUCCESS); 2795 } 2796 2797 void 2798 opl_mc_update_mlist(void) 2799 { 2800 int i; 2801 mc_opl_t *mcp; 2802 2803 /* 2804 * memory information is not updated until 2805 * the post attach/detach stage during DR. 2806 * This interface is used by dr_mem to inform 2807 * mc-opl to update the mlist. 2808 */ 2809 2810 mutex_enter(&mcmutex); 2811 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2812 if ((mcp = mc_instances[i]) == NULL) 2813 continue; 2814 mutex_enter(&mcp->mc_lock); 2815 if (mcp->mlist) 2816 mc_memlist_delete(mcp->mlist); 2817 mcp->mlist = NULL; 2818 mc_get_mlist(mcp); 2819 mutex_exit(&mcp->mc_lock); 2820 } 2821 mutex_exit(&mcmutex); 2822 } 2823 2824 /* caller must clear the SUSPEND bits or this will do nothing */ 2825 2826 int 2827 mc_resume(mc_opl_t *mcp, uint32_t flag) 2828 { 2829 int i; 2830 uint64_t basepa; 2831 2832 mutex_enter(&mcp->mc_lock); 2833 if (mcp->mc_status & MC_MEMORYLESS) { 2834 mutex_exit(&mcp->mc_lock); 2835 return (DDI_SUCCESS); 2836 } 2837 basepa = mcp->mc_start_address; 2838 if (get_base_address(mcp) == DDI_FAILURE) { 2839 mutex_exit(&mcp->mc_lock); 2840 return (DDI_FAILURE); 2841 } 2842 2843 if (basepa != mcp->mc_start_address) { 2844 if (mcp->mlist) 2845 mc_memlist_delete(mcp->mlist); 2846 mcp->mlist = NULL; 2847 mc_get_mlist(mcp); 2848 } 2849 2850 mcp->mc_status &= ~flag; 2851 2852 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 2853 mutex_exit(&mcp->mc_lock); 2854 return (DDI_SUCCESS); 2855 } 2856 2857 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2858 /* restart memory patrol checking */ 2859 mcp->mc_status |= MC_POLL_RUNNING; 2860 for (i = 0; i < BANKNUM_PER_SB; i++) { 2861 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2862 mc_check_errors_func(mcp); 2863 } 2864 } 2865 } 2866 mutex_exit(&mcp->mc_lock); 2867 2868 return (DDI_SUCCESS); 2869 } 2870 2871 static mc_opl_t * 2872 mc_pa_to_mcp(uint64_t pa) 2873 { 2874 mc_opl_t *mcp; 2875 int i; 2876 2877 ASSERT(MUTEX_HELD(&mcmutex)); 2878 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2879 if ((mcp = mc_instances[i]) == NULL) 2880 continue; 2881 /* if mac patrol is suspended, we cannot rely on it */ 2882 if (!(mcp->mc_status & MC_POLL_RUNNING) || 2883 (mcp->mc_status & MC_SOFT_SUSPENDED)) 2884 continue; 2885 if (mc_rangecheck_pa(mcp, pa)) { 2886 return (mcp); 2887 } 2888 } 2889 return (NULL); 2890 } 2891 2892 /* 2893 * Get Physical Board number from Logical one. 2894 */ 2895 static int 2896 mc_opl_get_physical_board(int sb) 2897 { 2898 if (&opl_get_physical_board) { 2899 return (opl_get_physical_board(sb)); 2900 } 2901 2902 cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n"); 2903 return (-1); 2904 } 2905 2906 /* ARGSUSED */ 2907 int 2908 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen, 2909 int *lenp) 2910 { 2911 int i; 2912 int j; 2913 int sb; 2914 int bank; 2915 int cs; 2916 mc_opl_t *mcp; 2917 char memb_num; 2918 2919 mutex_enter(&mcmutex); 2920 2921 if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) || 2922 (!pa_is_valid(mcp, flt_addr))) { 2923 mutex_exit(&mcmutex); 2924 if (snprintf(buf, buflen, "UNKNOWN") >= buflen) { 2925 return (ENOSPC); 2926 } else { 2927 if (lenp) 2928 *lenp = strlen(buf); 2929 } 2930 return (0); 2931 } 2932 2933 bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address); 2934 sb = mcp->mc_phys_board_num; 2935 cs = pa_to_cs(mcp, flt_addr - mcp->mc_start_address); 2936 2937 if (sb == -1) { 2938 mutex_exit(&mcmutex); 2939 return (ENXIO); 2940 } 2941 2942 if (plat_model == MODEL_DC) { 2943 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 2944 j = (cs == 0) ? i : i + 2; 2945 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s", 2946 model_names[plat_model].unit_name, sb, 2947 mc_dc_dimm_unum_table[j], 2948 mc_dc_dimm_unum_table[j + 1]); 2949 } else { 2950 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 2951 j = (cs == 0) ? i : i + 2; 2952 memb_num = mc_ff_dimm_unum_table[i][0]; 2953 snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s", 2954 model_names[plat_model].unit_name, 2955 model_names[plat_model].mem_name, memb_num, 2956 &mc_ff_dimm_unum_table[j][1], 2957 &mc_ff_dimm_unum_table[j + 1][1]); 2958 } 2959 if (lenp) { 2960 *lenp = strlen(buf); 2961 } 2962 mutex_exit(&mcmutex); 2963 return (0); 2964 } 2965 2966 int 2967 opl_mc_suspend(void) 2968 { 2969 mc_opl_t *mcp; 2970 int i; 2971 2972 mutex_enter(&mcmutex); 2973 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2974 if ((mcp = mc_instances[i]) == NULL) 2975 continue; 2976 mc_suspend(mcp, MC_SOFT_SUSPENDED); 2977 } 2978 mutex_exit(&mcmutex); 2979 2980 return (0); 2981 } 2982 2983 int 2984 opl_mc_resume(void) 2985 { 2986 mc_opl_t *mcp; 2987 int i; 2988 2989 mutex_enter(&mcmutex); 2990 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2991 if ((mcp = mc_instances[i]) == NULL) 2992 continue; 2993 mc_resume(mcp, MC_SOFT_SUSPENDED); 2994 } 2995 mutex_exit(&mcmutex); 2996 2997 return (0); 2998 } 2999 static void 3000 insert_mcp(mc_opl_t *mcp) 3001 { 3002 mutex_enter(&mcmutex); 3003 if (mc_instances[mcp->mc_board_num] != NULL) { 3004 MC_LOG("mc-opl instance for board# %d already exists\n", 3005 mcp->mc_board_num); 3006 } 3007 mc_instances[mcp->mc_board_num] = mcp; 3008 mutex_exit(&mcmutex); 3009 } 3010 3011 static void 3012 delete_mcp(mc_opl_t *mcp) 3013 { 3014 mutex_enter(&mcmutex); 3015 mc_instances[mcp->mc_board_num] = 0; 3016 mutex_exit(&mcmutex); 3017 } 3018 3019 /* Error injection interface */ 3020 3021 static void 3022 mc_lock_va(uint64_t pa, caddr_t new_va) 3023 { 3024 tte_t tte; 3025 3026 vtag_flushpage(new_va, (uint64_t)ksfmmup); 3027 sfmmu_memtte(&tte, pa >> PAGESHIFT, PROC_DATA|HAT_NOSYNC, TTE8K); 3028 tte.tte_intlo |= TTE_LCK_INT; 3029 sfmmu_dtlb_ld_kva(new_va, &tte); 3030 } 3031 3032 static void 3033 mc_unlock_va(caddr_t va) 3034 { 3035 vtag_flushpage(va, (uint64_t)ksfmmup); 3036 } 3037 3038 /* ARGSUSED */ 3039 int 3040 mc_inject_error(int error_type, uint64_t pa, uint32_t flags) 3041 { 3042 mc_opl_t *mcp; 3043 int bank; 3044 uint32_t dimm_addr; 3045 uint32_t cntl; 3046 mc_rsaddr_info_t rsaddr; 3047 uint32_t data, stat; 3048 int both_sides = 0; 3049 uint64_t pa0; 3050 int extra_injection_needed = 0; 3051 extern void cpu_flush_ecache(void); 3052 3053 MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags); 3054 3055 mutex_enter(&mcmutex); 3056 if ((mcp = mc_pa_to_mcp(pa)) == NULL) { 3057 mutex_exit(&mcmutex); 3058 MC_LOG("mc_inject_error: invalid pa\n"); 3059 return (ENOTSUP); 3060 } 3061 3062 mutex_enter(&mcp->mc_lock); 3063 mutex_exit(&mcmutex); 3064 3065 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 3066 mutex_exit(&mcp->mc_lock); 3067 MC_LOG("mc-opl has been suspended. No error injection.\n"); 3068 return (EBUSY); 3069 } 3070 3071 /* convert pa to offset within the board */ 3072 MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address); 3073 3074 if (!pa_is_valid(mcp, pa)) { 3075 mutex_exit(&mcp->mc_lock); 3076 return (EINVAL); 3077 } 3078 3079 pa0 = pa - mcp->mc_start_address; 3080 3081 bank = pa_to_bank(mcp, pa0); 3082 3083 if (flags & MC_INJECT_FLAG_OTHER) 3084 bank = bank ^ 1; 3085 3086 if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) { 3087 mutex_exit(&mcp->mc_lock); 3088 MC_LOG("Not mirror mode\n"); 3089 return (EINVAL); 3090 } 3091 3092 dimm_addr = pa_to_dimm(mcp, pa0); 3093 3094 MC_LOG("injecting error to /LSB%d/B%d/%x\n", mcp->mc_board_num, bank, 3095 dimm_addr); 3096 3097 3098 switch (error_type) { 3099 case MC_INJECT_INTERMITTENT_MCE: 3100 case MC_INJECT_PERMANENT_MCE: 3101 case MC_INJECT_MUE: 3102 both_sides = 1; 3103 } 3104 3105 if (flags & MC_INJECT_FLAG_RESET) 3106 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0); 3107 3108 ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK); 3109 3110 if (both_sides) { 3111 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0); 3112 ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), dimm_addr & 3113 MAC_EG_ADD_MASK); 3114 } 3115 3116 switch (error_type) { 3117 case MC_INJECT_SUE: 3118 extra_injection_needed = 1; 3119 /*FALLTHROUGH*/ 3120 case MC_INJECT_UE: 3121 case MC_INJECT_MUE: 3122 if (flags & MC_INJECT_FLAG_PATH) { 3123 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 | 3124 MAC_EG_FORCE_READ16 | MAC_EG_RDERR_ONCE; 3125 } else { 3126 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR00 | 3127 MAC_EG_FORCE_DERR16 | MAC_EG_DERR_ONCE; 3128 } 3129 flags |= MC_INJECT_FLAG_ST; 3130 break; 3131 case MC_INJECT_INTERMITTENT_CE: 3132 case MC_INJECT_INTERMITTENT_MCE: 3133 if (flags & MC_INJECT_FLAG_PATH) { 3134 cntl = MAC_EG_ADD_FIX |MAC_EG_FORCE_READ00 | 3135 MAC_EG_RDERR_ONCE; 3136 } else { 3137 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 | 3138 MAC_EG_DERR_ONCE; 3139 } 3140 extra_injection_needed = 1; 3141 flags |= MC_INJECT_FLAG_ST; 3142 break; 3143 case MC_INJECT_PERMANENT_CE: 3144 case MC_INJECT_PERMANENT_MCE: 3145 if (flags & MC_INJECT_FLAG_PATH) { 3146 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 | 3147 MAC_EG_RDERR_ALWAYS; 3148 } else { 3149 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 | 3150 MAC_EG_DERR_ALWAYS; 3151 } 3152 flags |= MC_INJECT_FLAG_ST; 3153 break; 3154 case MC_INJECT_CMPE: 3155 data = 0xabcdefab; 3156 stphys(pa, data); 3157 cpu_flush_ecache(); 3158 MC_LOG("CMPE: writing data %x to %lx\n", data, pa); 3159 ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE); 3160 stphys(pa, data ^ 0xffffffff); 3161 membar_sync(); 3162 cpu_flush_ecache(); 3163 ST_MAC_REG(MAC_MIRR(mcp, bank), 0); 3164 MC_LOG("CMPE: write new data %xto %lx\n", data, pa); 3165 cntl = 0; 3166 break; 3167 case MC_INJECT_NOP: 3168 cntl = 0; 3169 break; 3170 default: 3171 MC_LOG("mc_inject_error: invalid option\n"); 3172 cntl = 0; 3173 } 3174 3175 if (cntl) { 3176 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK); 3177 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 3178 3179 if (both_sides) { 3180 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 3181 MAC_EG_SETUP_MASK); 3182 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 3183 } 3184 } 3185 3186 /* 3187 * For all injection cases except compare error, we 3188 * must write to the PA to trigger the error. 3189 */ 3190 3191 if (flags & MC_INJECT_FLAG_ST) { 3192 data = 0xf0e0d0c0; 3193 MC_LOG("Writing %x to %lx\n", data, pa); 3194 stphys(pa, data); 3195 cpu_flush_ecache(); 3196 } 3197 3198 3199 if (flags & MC_INJECT_FLAG_LD) { 3200 if (flags & MC_INJECT_FLAG_PREFETCH) { 3201 /* 3202 * Use strong prefetch operation to 3203 * inject MI errors. 3204 */ 3205 page_t *pp; 3206 extern void mc_prefetch(caddr_t); 3207 3208 MC_LOG("prefetch\n"); 3209 3210 pp = page_numtopp_nolock(pa >> PAGESHIFT); 3211 if (pp != NULL) { 3212 caddr_t va, va1; 3213 3214 va = ppmapin(pp, PROT_READ|PROT_WRITE, 3215 (caddr_t)-1); 3216 kpreempt_disable(); 3217 mc_lock_va((uint64_t)pa, va); 3218 va1 = va + (pa & (PAGESIZE - 1)); 3219 mc_prefetch(va1); 3220 mc_unlock_va(va); 3221 kpreempt_enable(); 3222 ppmapout(va); 3223 3224 /* 3225 * For MI errors, we need one extra 3226 * injection for HW patrol to stop. 3227 */ 3228 extra_injection_needed = 1; 3229 } else { 3230 cmn_err(CE_WARN, "Cannot find page structure" 3231 " for PA %lx\n", pa); 3232 } 3233 } else { 3234 MC_LOG("Reading from %lx\n", pa); 3235 data = ldphys(pa); 3236 MC_LOG("data = %x\n", data); 3237 } 3238 3239 if (extra_injection_needed) { 3240 /* 3241 * These are the injection cases where the 3242 * requested injected errors will not cause the HW 3243 * patrol to stop. For these cases, we need to inject 3244 * an extra 'real' PTRL error to force the 3245 * HW patrol to stop so that we can report the 3246 * errors injected. Note that we cannot read 3247 * and report error status while the HW patrol 3248 * is running. 3249 */ 3250 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 3251 cntl & MAC_EG_SETUP_MASK); 3252 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 3253 3254 if (both_sides) { 3255 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 3256 MAC_EG_SETUP_MASK); 3257 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 3258 } 3259 data = 0xf0e0d0c0; 3260 MC_LOG("Writing %x to %lx\n", data, pa); 3261 stphys(pa, data); 3262 cpu_flush_ecache(); 3263 } 3264 } 3265 3266 if (flags & MC_INJECT_FLAG_RESTART) { 3267 MC_LOG("Restart patrol\n"); 3268 rsaddr.mi_restartaddr.ma_bd = mcp->mc_board_num; 3269 rsaddr.mi_restartaddr.ma_bank = bank; 3270 rsaddr.mi_restartaddr.ma_dimm_addr = dimm_addr; 3271 rsaddr.mi_valid = 1; 3272 rsaddr.mi_injectrestart = 1; 3273 restart_patrol(mcp, bank, &rsaddr); 3274 } 3275 3276 if (flags & MC_INJECT_FLAG_POLL) { 3277 int running; 3278 int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank; 3279 3280 MC_LOG("Poll patrol error\n"); 3281 stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank)); 3282 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 3283 running = cntl & MAC_CNTL_PTRL_START; 3284 3285 if (!running && 3286 (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) { 3287 /* 3288 * HW patrol stopped and we have errors to 3289 * report. Do it. 3290 */ 3291 mcp->mc_speedup_period[ebank] = 0; 3292 rsaddr.mi_valid = 0; 3293 rsaddr.mi_injectrestart = 0; 3294 if (IS_MIRROR(mcp, bank)) { 3295 mc_error_handler_mir(mcp, bank, &rsaddr); 3296 } else { 3297 mc_error_handler(mcp, bank, &rsaddr); 3298 } 3299 3300 restart_patrol(mcp, bank, &rsaddr); 3301 } else { 3302 /* 3303 * We are expecting to report injected 3304 * errors but the HW patrol is still running. 3305 * Speed up the scanning 3306 */ 3307 mcp->mc_speedup_period[ebank] = 2; 3308 MAC_CMD(mcp, bank, 0); 3309 restart_patrol(mcp, bank, NULL); 3310 } 3311 } 3312 3313 mutex_exit(&mcp->mc_lock); 3314 return (0); 3315 } 3316 3317 void 3318 mc_stphysio(uint64_t pa, uint32_t data) 3319 { 3320 MC_LOG("0x%x -> pa(%lx)\n", data, pa); 3321 stphysio(pa, data); 3322 3323 /* force the above write to be processed by mac patrol */ 3324 data = ldphysio(pa); 3325 MC_LOG("pa(%lx) = 0x%x\n", pa, data); 3326 } 3327 3328 uint32_t 3329 mc_ldphysio(uint64_t pa) 3330 { 3331 uint32_t rv; 3332 3333 rv = ldphysio(pa); 3334 MC_LOG("pa(%lx) = 0x%x\n", pa, rv); 3335 return (rv); 3336 } 3337 3338 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9') 3339 3340 /* 3341 * parse_unum_memory -- extract the board number and the DIMM name from 3342 * the unum. 3343 * 3344 * Return 0 for success and non-zero for a failure. 3345 */ 3346 int 3347 parse_unum_memory(char *unum, int *board, char *dname) 3348 { 3349 char *c; 3350 char x, y, z; 3351 3352 if ((c = strstr(unum, "CMU")) != NULL) { 3353 /* DC Model */ 3354 c += 3; 3355 *board = (uint8_t)stoi(&c); 3356 if ((c = strstr(c, "MEM")) == NULL) { 3357 return (1); 3358 } 3359 c += 3; 3360 if (strlen(c) < 3) { 3361 return (2); 3362 } 3363 if ((!isdigit(c[0])) || (!(isdigit(c[1]))) || 3364 ((c[2] != 'A') && (c[2] != 'B'))) { 3365 return (3); 3366 } 3367 x = c[0]; 3368 y = c[1]; 3369 z = c[2]; 3370 } else if ((c = strstr(unum, "MBU_")) != NULL) { 3371 /* FF1/FF2 Model */ 3372 c += 4; 3373 if ((c[0] != 'A') && (c[0] != 'B')) { 3374 return (4); 3375 } 3376 if ((c = strstr(c, "MEMB")) == NULL) { 3377 return (5); 3378 } 3379 c += 4; 3380 3381 x = c[0]; 3382 *board = ((uint8_t)stoi(&c)) / 4; 3383 if ((c = strstr(c, "MEM")) == NULL) { 3384 return (6); 3385 } 3386 c += 3; 3387 if (strlen(c) < 2) { 3388 return (7); 3389 } 3390 if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) { 3391 return (8); 3392 } 3393 y = c[0]; 3394 z = c[1]; 3395 } else { 3396 return (9); 3397 } 3398 if (*board < 0) { 3399 return (10); 3400 } 3401 dname[0] = x; 3402 dname[1] = y; 3403 dname[2] = z; 3404 dname[3] = '\0'; 3405 return (0); 3406 } 3407 3408 /* 3409 * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and 3410 * the DIMM name. 3411 */ 3412 int 3413 mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 3414 int buflen, int *lenp) 3415 { 3416 int ret = ENODEV; 3417 mc_dimm_info_t *d = NULL; 3418 3419 if ((d = mcp->mc_dimm_list) == NULL) 3420 return (ENOTSUP); 3421 3422 for (; d != NULL; d = d->md_next) { 3423 if (strcmp(d->md_dimmname, dname) == 0) { 3424 break; 3425 } 3426 } 3427 if (d != NULL) { 3428 *lenp = strlen(d->md_serial) + strlen(d->md_partnum); 3429 if (buflen <= *lenp) { 3430 cmn_err(CE_WARN, "mc_get_mem_sid_dimm: " 3431 "buflen is smaller than %d\n", *lenp); 3432 ret = ENOSPC; 3433 } else { 3434 snprintf(buf, buflen, "%s:%s", 3435 d->md_serial, d->md_partnum); 3436 ret = 0; 3437 } 3438 } 3439 MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n", 3440 ret, dname, (ret == 0) ? buf : ""); 3441 return (ret); 3442 } 3443 3444 int 3445 mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int sb, 3446 int bank, uint32_t mf_type, uint32_t d_slot) 3447 { 3448 int lenp = buflen; 3449 int id; 3450 int ret; 3451 char *dimmnm; 3452 3453 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 3454 mf_type == FLT_TYPE_PERMANENT_CE) { 3455 if (plat_model == MODEL_DC) { 3456 id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 3457 dimmnm = mc_dc_dimm_unum_table[id]; 3458 } else { 3459 id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 3460 dimmnm = mc_ff_dimm_unum_table[id]; 3461 } 3462 if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen, 3463 &lenp)) != 0) { 3464 return (ret); 3465 } 3466 } else { 3467 return (1); 3468 } 3469 3470 return (0); 3471 } 3472 3473 /* 3474 * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum. 3475 */ 3476 int 3477 mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 3478 { 3479 int i; 3480 int ret = ENODEV; 3481 int board; 3482 char dname[MCOPL_MAX_DIMMNAME + 1]; 3483 mc_opl_t *mcp; 3484 3485 MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen); 3486 if ((ret = parse_unum_memory(unum, &board, dname)) != 0) { 3487 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3488 unum, ret); 3489 return (EINVAL); 3490 } 3491 3492 if (board < 0) { 3493 MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n", 3494 board, dname); 3495 return (EINVAL); 3496 } 3497 3498 mutex_enter(&mcmutex); 3499 /* 3500 * return ENOENT if we can not find the matching board. 3501 */ 3502 ret = ENOENT; 3503 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3504 if ((mcp = mc_instances[i]) == NULL) 3505 continue; 3506 mutex_enter(&mcp->mc_lock); 3507 if (mcp->mc_phys_board_num != board) { 3508 mutex_exit(&mcp->mc_lock); 3509 continue; 3510 } 3511 ret = mc_get_mem_sid_dimm(mcp, dname, buf, buflen, lenp); 3512 if (ret == 0) { 3513 mutex_exit(&mcp->mc_lock); 3514 break; 3515 } 3516 mutex_exit(&mcp->mc_lock); 3517 } 3518 mutex_exit(&mcmutex); 3519 return (ret); 3520 } 3521 3522 /* 3523 * mc_get_mem_offset -- get the offset in a DIMM for a given physical address. 3524 */ 3525 int 3526 mc_get_mem_offset(uint64_t paddr, uint64_t *offp) 3527 { 3528 int i; 3529 int ret = ENODEV; 3530 mc_addr_t maddr; 3531 mc_opl_t *mcp; 3532 3533 mutex_enter(&mcmutex); 3534 for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) { 3535 if ((mcp = mc_instances[i]) == NULL) 3536 continue; 3537 mutex_enter(&mcp->mc_lock); 3538 if (!pa_is_valid(mcp, paddr)) { 3539 mutex_exit(&mcp->mc_lock); 3540 continue; 3541 } 3542 if (pa_to_maddr(mcp, paddr, &maddr) == 0) { 3543 *offp = maddr.ma_dimm_addr; 3544 ret = 0; 3545 } 3546 mutex_exit(&mcp->mc_lock); 3547 } 3548 mutex_exit(&mcmutex); 3549 MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n", 3550 ret, paddr, *offp); 3551 return (ret); 3552 } 3553 3554 /* 3555 * dname_to_bankslot - Get the bank and slot number from the DIMM name. 3556 */ 3557 int 3558 dname_to_bankslot(char *dname, int *bank, int *slot) 3559 { 3560 int i; 3561 int tsz; 3562 char **tbl; 3563 3564 if (plat_model == MODEL_DC) { /* DC */ 3565 tbl = mc_dc_dimm_unum_table; 3566 tsz = OPL_MAX_DIMMS; 3567 } else { 3568 tbl = mc_ff_dimm_unum_table; 3569 tsz = 2 * OPL_MAX_DIMMS; 3570 } 3571 3572 for (i = 0; i < tsz; i++) { 3573 if (strcmp(dname, tbl[i]) == 0) { 3574 break; 3575 } 3576 } 3577 if (i == tsz) { 3578 return (1); 3579 } 3580 *bank = INDEX_TO_BANK(i); 3581 *slot = INDEX_TO_SLOT(i); 3582 return (0); 3583 } 3584 3585 /* 3586 * mc_get_mem_addr -- get the physical address of a DIMM corresponding 3587 * to the unum and sid. 3588 */ 3589 int 3590 mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr) 3591 { 3592 int board; 3593 int bank; 3594 int slot; 3595 int i; 3596 int ret = ENODEV; 3597 char dname[MCOPL_MAX_DIMMNAME + 1]; 3598 mc_addr_t maddr; 3599 mc_opl_t *mcp; 3600 3601 MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n", 3602 unum, sid, offset); 3603 if (parse_unum_memory(unum, &board, dname) != 0) { 3604 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3605 unum, ret); 3606 return (EINVAL); 3607 } 3608 3609 if (board < 0) { 3610 MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n", 3611 board, dname); 3612 return (EINVAL); 3613 } 3614 3615 mutex_enter(&mcmutex); 3616 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3617 if ((mcp = mc_instances[i]) == NULL) 3618 continue; 3619 mutex_enter(&mcp->mc_lock); 3620 if (mcp->mc_phys_board_num != board) { 3621 mutex_exit(&mcp->mc_lock); 3622 continue; 3623 } 3624 3625 ret = dname_to_bankslot(dname, &bank, &slot); 3626 MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot); 3627 if (ret != 0) { 3628 MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n"); 3629 ret = ENODEV; 3630 } else { 3631 maddr.ma_bd = mcp->mc_board_num; 3632 maddr.ma_bank = bank; 3633 maddr.ma_dimm_addr = offset; 3634 ret = mcaddr_to_pa(mcp, &maddr, paddr); 3635 if (ret != 0) { 3636 MC_LOG("mc_get_mem_addr: " 3637 "mcaddr_to_pa failed\n"); 3638 ret = ENODEV; 3639 } 3640 mutex_exit(&mcp->mc_lock); 3641 break; 3642 } 3643 mutex_exit(&mcp->mc_lock); 3644 } 3645 mutex_exit(&mcmutex); 3646 MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr); 3647 return (ret); 3648 } 3649 3650 static void 3651 mc_free_dimm_list(mc_dimm_info_t *d) 3652 { 3653 mc_dimm_info_t *next; 3654 3655 while (d != NULL) { 3656 next = d->md_next; 3657 kmem_free(d, sizeof (mc_dimm_info_t)); 3658 d = next; 3659 } 3660 } 3661 3662 /* 3663 * mc_get_dimm_list -- get the list of dimms with serial-id info 3664 * from the SP. 3665 */ 3666 mc_dimm_info_t * 3667 mc_get_dimm_list(mc_opl_t *mcp) 3668 { 3669 uint32_t bufsz; 3670 uint32_t maxbufsz; 3671 int ret; 3672 int sexp; 3673 board_dimm_info_t *bd_dimmp; 3674 mc_dimm_info_t *dimm_list = NULL; 3675 3676 maxbufsz = bufsz = sizeof (board_dimm_info_t) + 3677 ((MCOPL_MAX_DIMMNAME + MCOPL_MAX_SERIAL + 3678 MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS); 3679 3680 bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP); 3681 ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz); 3682 3683 MC_LOG("mc_get_dimm_list: scf_service_getinfo returned=%d\n", ret); 3684 if (ret == 0) { 3685 sexp = sizeof (board_dimm_info_t) + 3686 ((bd_dimmp->bd_dnamesz + bd_dimmp->bd_serialsz + 3687 bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms); 3688 3689 if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) && 3690 (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) && 3691 (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) && 3692 (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) && 3693 (sexp <= bufsz)) { 3694 3695 #ifdef DEBUG 3696 if (oplmc_debug) 3697 mc_dump_dimm_info(bd_dimmp); 3698 #endif 3699 dimm_list = mc_prepare_dimmlist(bd_dimmp); 3700 3701 } else { 3702 cmn_err(CE_WARN, "DIMM info version mismatch\n"); 3703 } 3704 } 3705 kmem_free(bd_dimmp, maxbufsz); 3706 MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", dimm_list); 3707 return (dimm_list); 3708 } 3709 3710 /* 3711 * mc_prepare_dimmlist - Prepare the dimm list from the information 3712 * received from the SP. 3713 */ 3714 mc_dimm_info_t * 3715 mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp) 3716 { 3717 char *dimm_name; 3718 char *serial; 3719 char *part; 3720 int dimm; 3721 int dnamesz = bd_dimmp->bd_dnamesz; 3722 int sersz = bd_dimmp->bd_serialsz; 3723 int partsz = bd_dimmp->bd_partnumsz; 3724 mc_dimm_info_t *dimm_list = NULL; 3725 mc_dimm_info_t *d; 3726 3727 dimm_name = (char *)(bd_dimmp + 1); 3728 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3729 3730 d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t), 3731 KM_SLEEP); 3732 3733 bcopy(dimm_name, d->md_dimmname, dnamesz); 3734 d->md_dimmname[dnamesz] = 0; 3735 3736 serial = dimm_name + dnamesz; 3737 bcopy(serial, d->md_serial, sersz); 3738 d->md_serial[sersz] = 0; 3739 3740 part = serial + sersz; 3741 bcopy(part, d->md_partnum, partsz); 3742 d->md_partnum[partsz] = 0; 3743 3744 d->md_next = dimm_list; 3745 dimm_list = d; 3746 dimm_name = part + partsz; 3747 } 3748 return (dimm_list); 3749 } 3750 3751 #ifdef DEBUG 3752 void 3753 mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz) 3754 { 3755 char dname[MCOPL_MAX_DIMMNAME + 1]; 3756 char serial[MCOPL_MAX_SERIAL + 1]; 3757 char part[ MCOPL_MAX_PARTNUM + 1]; 3758 char *b; 3759 3760 b = buf; 3761 bcopy(b, dname, dnamesz); 3762 dname[dnamesz] = 0; 3763 3764 b += dnamesz; 3765 bcopy(b, serial, serialsz); 3766 serial[serialsz] = 0; 3767 3768 b += serialsz; 3769 bcopy(b, part, partnumsz); 3770 part[partnumsz] = 0; 3771 3772 printf("DIMM=%s Serial=%s PartNum=%s\n", dname, serial, part); 3773 } 3774 3775 void 3776 mc_dump_dimm_info(board_dimm_info_t *bd_dimmp) 3777 { 3778 int dimm; 3779 int dnamesz = bd_dimmp->bd_dnamesz; 3780 int sersz = bd_dimmp->bd_serialsz; 3781 int partsz = bd_dimmp->bd_partnumsz; 3782 char *buf; 3783 3784 printf("Version=%d Board=%02d DIMMs=%d NameSize=%d " 3785 "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version, 3786 bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz, 3787 bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz); 3788 printf("======================================================\n"); 3789 3790 buf = (char *)(bd_dimmp + 1); 3791 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3792 mc_dump_dimm(buf, dnamesz, sersz, partsz); 3793 buf += dnamesz + sersz + partsz; 3794 } 3795 printf("======================================================\n"); 3796 } 3797 3798 3799 /* ARGSUSED */ 3800 static int 3801 mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 3802 int *rvalp) 3803 { 3804 caddr_t buf; 3805 uint64_t pa; 3806 int rv = 0; 3807 int i; 3808 uint32_t flags; 3809 static uint32_t offset = 0; 3810 3811 3812 flags = (cmd >> 4) & 0xfffffff; 3813 3814 cmd &= 0xf; 3815 3816 MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags); 3817 3818 if (arg != NULL) { 3819 if (ddi_copyin((const void *)arg, (void *)&pa, 3820 sizeof (uint64_t), 0) < 0) { 3821 rv = EFAULT; 3822 return (rv); 3823 } 3824 buf = NULL; 3825 } else { 3826 buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP); 3827 3828 pa = va_to_pa(buf); 3829 pa += offset; 3830 3831 offset += 64; 3832 if (offset >= PAGESIZE) 3833 offset = 0; 3834 } 3835 3836 switch (cmd) { 3837 case MCI_CE: 3838 mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, flags); 3839 break; 3840 case MCI_PERM_CE: 3841 mc_inject_error(MC_INJECT_PERMANENT_CE, pa, flags); 3842 break; 3843 case MCI_UE: 3844 mc_inject_error(MC_INJECT_UE, pa, flags); 3845 break; 3846 case MCI_M_CE: 3847 mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, flags); 3848 break; 3849 case MCI_M_PCE: 3850 mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, flags); 3851 break; 3852 case MCI_M_UE: 3853 mc_inject_error(MC_INJECT_MUE, pa, flags); 3854 break; 3855 case MCI_CMP: 3856 mc_inject_error(MC_INJECT_CMPE, pa, flags); 3857 break; 3858 case MCI_NOP: 3859 mc_inject_error(MC_INJECT_NOP, pa, flags); break; 3860 case MCI_SHOW_ALL: 3861 mc_debug_show_all = 1; 3862 break; 3863 case MCI_SHOW_NONE: 3864 mc_debug_show_all = 0; 3865 break; 3866 case MCI_ALLOC: 3867 /* 3868 * just allocate some kernel memory and never free it 3869 * 512 MB seems to be the maximum size supported. 3870 */ 3871 cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512); 3872 for (i = 0; i < flags; i++) { 3873 buf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP); 3874 cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n", 3875 (u_longlong_t)buf, (u_longlong_t)va_to_pa(buf)); 3876 } 3877 break; 3878 case MCI_SUSPEND: 3879 (void) opl_mc_suspend(); 3880 break; 3881 case MCI_RESUME: 3882 (void) opl_mc_resume(); 3883 break; 3884 default: 3885 rv = ENXIO; 3886 } 3887 return (rv); 3888 } 3889 3890 #endif /* DEBUG */ 3891