1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2007 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/conf.h> 34 #include <sys/modctl.h> 35 #include <sys/stat.h> 36 #include <sys/async.h> 37 #include <sys/machcpuvar.h> 38 #include <sys/machsystm.h> 39 #include <sys/promif.h> 40 #include <sys/ksynch.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/sunndi.h> 44 #include <sys/ddifm.h> 45 #include <sys/fm/protocol.h> 46 #include <sys/fm/util.h> 47 #include <sys/kmem.h> 48 #include <sys/fm/io/opl_mc_fm.h> 49 #include <sys/memlist.h> 50 #include <sys/param.h> 51 #include <sys/disp.h> 52 #include <vm/page.h> 53 #include <sys/mc-opl.h> 54 #include <sys/opl.h> 55 #include <sys/opl_dimm.h> 56 #include <sys/scfd/scfostoescf.h> 57 #include <sys/cpu_module.h> 58 #include <vm/seg_kmem.h> 59 #include <sys/vmem.h> 60 #include <vm/hat_sfmmu.h> 61 #include <sys/vmsystm.h> 62 #include <sys/membar.h> 63 64 /* 65 * Function prototypes 66 */ 67 static int mc_open(dev_t *, int, int, cred_t *); 68 static int mc_close(dev_t, int, int, cred_t *); 69 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 70 static int mc_attach(dev_info_t *, ddi_attach_cmd_t); 71 static int mc_detach(dev_info_t *, ddi_detach_cmd_t); 72 73 static int mc_poll_init(void); 74 static void mc_poll_fini(void); 75 static int mc_board_add(mc_opl_t *mcp); 76 static int mc_board_del(mc_opl_t *mcp); 77 static int mc_suspend(mc_opl_t *mcp, uint32_t flag); 78 static int mc_resume(mc_opl_t *mcp, uint32_t flag); 79 int opl_mc_suspend(void); 80 int opl_mc_resume(void); 81 82 static void insert_mcp(mc_opl_t *mcp); 83 static void delete_mcp(mc_opl_t *mcp); 84 85 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr); 86 87 static int mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa); 88 89 int mc_get_mem_unum(int, uint64_t, char *, int, int *); 90 int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr); 91 int mc_get_mem_offset(uint64_t paddr, uint64_t *offp); 92 int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp); 93 int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 94 int buflen, int *lenp); 95 mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp); 96 mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp); 97 int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank, 98 uint32_t mf_type, uint32_t d_slot); 99 static void mc_free_dimm_list(mc_dimm_info_t *d); 100 static void mc_get_mlist(mc_opl_t *); 101 static void mc_polling(void); 102 static int mc_opl_get_physical_board(int); 103 104 static void mc_clear_rewrite(mc_opl_t *mcp, int i); 105 static void mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state); 106 107 #ifdef DEBUG 108 static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *); 109 void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz); 110 void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp); 111 #endif 112 113 #pragma weak opl_get_physical_board 114 extern int opl_get_physical_board(int); 115 extern int plat_max_boards(void); 116 117 /* 118 * Configuration data structures 119 */ 120 static struct cb_ops mc_cb_ops = { 121 mc_open, /* open */ 122 mc_close, /* close */ 123 nulldev, /* strategy */ 124 nulldev, /* print */ 125 nodev, /* dump */ 126 nulldev, /* read */ 127 nulldev, /* write */ 128 mc_ioctl, /* ioctl */ 129 nodev, /* devmap */ 130 nodev, /* mmap */ 131 nodev, /* segmap */ 132 nochpoll, /* poll */ 133 ddi_prop_op, /* cb_prop_op */ 134 0, /* streamtab */ 135 D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flag */ 136 CB_REV, /* rev */ 137 nodev, /* cb_aread */ 138 nodev /* cb_awrite */ 139 }; 140 141 static struct dev_ops mc_ops = { 142 DEVO_REV, /* rev */ 143 0, /* refcnt */ 144 ddi_getinfo_1to1, /* getinfo */ 145 nulldev, /* identify */ 146 nulldev, /* probe */ 147 mc_attach, /* attach */ 148 mc_detach, /* detach */ 149 nulldev, /* reset */ 150 &mc_cb_ops, /* cb_ops */ 151 (struct bus_ops *)0, /* bus_ops */ 152 nulldev /* power */ 153 }; 154 155 /* 156 * Driver globals 157 */ 158 159 static enum { 160 MODEL_FF1 = 0, 161 MODEL_FF2 = 1, 162 MODEL_DC = 2 163 } plat_model = MODEL_DC; /* The default behaviour is DC */ 164 165 static struct plat_model_names { 166 const char *unit_name; 167 const char *mem_name; 168 } model_names[] = { 169 { "MBU_A", "MEMB" }, 170 { "MBU_B", "MEMB" }, 171 { "CMU", "" } 172 }; 173 174 /* 175 * The DIMM Names for DC platform. 176 * The index into this table is made up of (bank, dslot), 177 * Where dslot occupies bits 0-1 and bank occupies 2-4. 178 */ 179 static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = { 180 /* --------CMUnn----------- */ 181 /* --CS0-----|--CS1------ */ 182 /* -H-|--L-- | -H- | -L-- */ 183 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 184 "13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */ 185 "23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */ 186 "33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */ 187 "01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */ 188 "11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */ 189 "21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */ 190 "31A", "30A", "31B", "30B" /* Bank 7 (MAC 3 bank 1) */ 191 }; 192 193 /* 194 * The DIMM Names for FF1/FF2 platforms. 195 * The index into this table is made up of (board, bank, dslot), 196 * Where dslot occupies bits 0-1, bank occupies 2-4 and 197 * board occupies the bit 5. 198 */ 199 static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = { 200 /* --------CMU0---------- */ 201 /* --CS0-----|--CS1------ */ 202 /* -H-|--L-- | -H- | -L-- */ 203 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 204 "01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */ 205 "13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */ 206 "11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */ 207 "23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */ 208 "21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */ 209 "33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */ 210 "31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */ 211 /* --------CMU1---------- */ 212 /* --CS0-----|--CS1------ */ 213 /* -H-|--L-- | -H- | -L-- */ 214 "43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */ 215 "41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */ 216 "53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */ 217 "51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */ 218 "63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */ 219 "61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */ 220 "73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */ 221 "71A", "70A", "71B", "70B" /* Bank 7 (MAC 3 bank 1) */ 222 }; 223 224 #define BD_BK_SLOT_TO_INDEX(bd, bk, s) \ 225 (((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03)) 226 227 #define INDEX_TO_BANK(i) (((i) & 0x1C) >> 2) 228 #define INDEX_TO_SLOT(i) ((i) & 0x03) 229 230 #define SLOT_TO_CS(slot) ((slot & 0x3) >> 1) 231 232 /* Isolation unit size is 64 MB */ 233 #define MC_ISOLATION_BSIZE (64 * 1024 * 1024) 234 235 #define MC_MAX_SPEEDS 7 236 237 typedef struct { 238 uint32_t mc_speeds; 239 uint32_t mc_period; 240 } mc_scan_speed_t; 241 242 #define MC_CNTL_SPEED_SHIFT 26 243 244 /* 245 * In mirror mode, we normalized the bank idx to "even" since 246 * the HW treats them as one unit w.r.t programming. 247 * This bank index will be the "effective" bank index. 248 * All mirrored bank state info on mc_period, mc_speedup_period 249 * will be stored in the even bank structure to avoid code duplication. 250 */ 251 #define MIRROR_IDX(bankidx) (bankidx & ~1) 252 253 static mc_scan_speed_t mc_scan_speeds[MC_MAX_SPEEDS] = { 254 {0x6 << MC_CNTL_SPEED_SHIFT, 0}, 255 {0x5 << MC_CNTL_SPEED_SHIFT, 32}, 256 {0x4 << MC_CNTL_SPEED_SHIFT, 64}, 257 {0x3 << MC_CNTL_SPEED_SHIFT, 128}, 258 {0x2 << MC_CNTL_SPEED_SHIFT, 256}, 259 {0x1 << MC_CNTL_SPEED_SHIFT, 512}, 260 {0x0 << MC_CNTL_SPEED_SHIFT, 1024} 261 }; 262 263 static uint32_t mc_max_speed = (0x6 << 26); 264 265 int mc_isolation_bsize = MC_ISOLATION_BSIZE; 266 int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC; 267 int mc_max_scf_retry = 16; 268 int mc_max_scf_logs = 64; 269 int mc_max_errlog_processed = BANKNUM_PER_SB*2; 270 int mc_scan_period = 12 * 60 * 60; /* 12 hours period */ 271 int mc_max_rewrite_loop = 100; 272 int mc_rewrite_delay = 10; 273 /* 274 * it takes SCF about 300 m.s. to process a requst. We can bail out 275 * if it is busy. It does not pay to wait for it too long. 276 */ 277 int mc_max_scf_loop = 2; 278 int mc_scf_delay = 100; 279 int mc_pce_dropped = 0; 280 int mc_poll_priority = MINCLSYSPRI; 281 int mc_max_rewrite_retry = 6 * 60; 282 283 284 /* 285 * Mutex hierarchy in mc-opl 286 * If both mcmutex and mc_lock must be held, 287 * mcmutex must be acquired first, and then mc_lock. 288 */ 289 290 static kmutex_t mcmutex; 291 mc_opl_t *mc_instances[OPL_MAX_BOARDS]; 292 293 static kmutex_t mc_polling_lock; 294 static kcondvar_t mc_polling_cv; 295 static kcondvar_t mc_poll_exit_cv; 296 static int mc_poll_cmd = 0; 297 static int mc_pollthr_running = 0; 298 int mc_timeout_period = 0; /* this is in m.s. */ 299 void *mc_statep; 300 301 #ifdef DEBUG 302 int oplmc_debug = 0; 303 #endif 304 305 static int mc_debug_show_all = 0; 306 307 extern struct mod_ops mod_driverops; 308 309 static struct modldrv modldrv = { 310 &mod_driverops, /* module type, this one is a driver */ 311 "OPL Memory-controller %I%", /* module name */ 312 &mc_ops, /* driver ops */ 313 }; 314 315 static struct modlinkage modlinkage = { 316 MODREV_1, /* rev */ 317 (void *)&modldrv, 318 NULL 319 }; 320 321 #pragma weak opl_get_mem_unum 322 #pragma weak opl_get_mem_sid 323 #pragma weak opl_get_mem_offset 324 #pragma weak opl_get_mem_addr 325 326 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 327 extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 328 extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 329 extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset, 330 uint64_t *paddr); 331 332 333 /* 334 * pseudo-mc node portid format 335 * 336 * [10] = 0 337 * [9] = 1 338 * [8] = LSB_ID[4] = 0 339 * [7:4] = LSB_ID[3:0] 340 * [3:0] = 0 341 * 342 */ 343 344 /* 345 * These are the module initialization routines. 346 */ 347 int 348 _init(void) 349 { 350 int error; 351 int plen; 352 char model[20]; 353 pnode_t node; 354 355 356 if ((error = ddi_soft_state_init(&mc_statep, 357 sizeof (mc_opl_t), 1)) != 0) 358 return (error); 359 360 if ((error = mc_poll_init()) != 0) { 361 ddi_soft_state_fini(&mc_statep); 362 return (error); 363 } 364 365 mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL); 366 if (&opl_get_mem_unum) 367 opl_get_mem_unum = mc_get_mem_unum; 368 if (&opl_get_mem_sid) 369 opl_get_mem_sid = mc_get_mem_sid; 370 if (&opl_get_mem_offset) 371 opl_get_mem_offset = mc_get_mem_offset; 372 if (&opl_get_mem_addr) 373 opl_get_mem_addr = mc_get_mem_addr; 374 375 node = prom_rootnode(); 376 plen = prom_getproplen(node, "model"); 377 378 if (plen > 0 && plen < sizeof (model)) { 379 (void) prom_getprop(node, "model", model); 380 model[plen] = '\0'; 381 if (strcmp(model, "FF1") == 0) 382 plat_model = MODEL_FF1; 383 else if (strcmp(model, "FF2") == 0) 384 plat_model = MODEL_FF2; 385 else if (strncmp(model, "DC", 2) == 0) 386 plat_model = MODEL_DC; 387 } 388 389 error = mod_install(&modlinkage); 390 if (error != 0) { 391 if (&opl_get_mem_unum) 392 opl_get_mem_unum = NULL; 393 if (&opl_get_mem_sid) 394 opl_get_mem_sid = NULL; 395 if (&opl_get_mem_offset) 396 opl_get_mem_offset = NULL; 397 if (&opl_get_mem_addr) 398 opl_get_mem_addr = NULL; 399 mutex_destroy(&mcmutex); 400 mc_poll_fini(); 401 ddi_soft_state_fini(&mc_statep); 402 } 403 return (error); 404 } 405 406 int 407 _fini(void) 408 { 409 int error; 410 411 if ((error = mod_remove(&modlinkage)) != 0) 412 return (error); 413 414 if (&opl_get_mem_unum) 415 opl_get_mem_unum = NULL; 416 if (&opl_get_mem_sid) 417 opl_get_mem_sid = NULL; 418 if (&opl_get_mem_offset) 419 opl_get_mem_offset = NULL; 420 if (&opl_get_mem_addr) 421 opl_get_mem_addr = NULL; 422 423 mutex_destroy(&mcmutex); 424 mc_poll_fini(); 425 ddi_soft_state_fini(&mc_statep); 426 427 return (0); 428 } 429 430 int 431 _info(struct modinfo *modinfop) 432 { 433 return (mod_info(&modlinkage, modinfop)); 434 } 435 436 static void 437 mc_polling_thread() 438 { 439 mutex_enter(&mc_polling_lock); 440 mc_pollthr_running = 1; 441 while (!(mc_poll_cmd & MC_POLL_EXIT)) { 442 mc_polling(); 443 cv_timedwait(&mc_polling_cv, &mc_polling_lock, 444 ddi_get_lbolt() + mc_timeout_period); 445 } 446 mc_pollthr_running = 0; 447 448 /* 449 * signal if any one is waiting for this thread to exit. 450 */ 451 cv_signal(&mc_poll_exit_cv); 452 mutex_exit(&mc_polling_lock); 453 thread_exit(); 454 /* NOTREACHED */ 455 } 456 457 static int 458 mc_poll_init() 459 { 460 mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL); 461 cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL); 462 cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL); 463 return (0); 464 } 465 466 static void 467 mc_poll_fini() 468 { 469 mutex_enter(&mc_polling_lock); 470 if (mc_pollthr_running) { 471 mc_poll_cmd = MC_POLL_EXIT; 472 cv_signal(&mc_polling_cv); 473 while (mc_pollthr_running) { 474 cv_wait(&mc_poll_exit_cv, &mc_polling_lock); 475 } 476 } 477 mutex_exit(&mc_polling_lock); 478 mutex_destroy(&mc_polling_lock); 479 cv_destroy(&mc_polling_cv); 480 cv_destroy(&mc_poll_exit_cv); 481 } 482 483 static int 484 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 485 { 486 mc_opl_t *mcp; 487 int instance; 488 int rv; 489 490 /* get the instance of this devi */ 491 instance = ddi_get_instance(devi); 492 493 switch (cmd) { 494 case DDI_ATTACH: 495 break; 496 case DDI_RESUME: 497 mcp = ddi_get_soft_state(mc_statep, instance); 498 rv = mc_resume(mcp, MC_DRIVER_SUSPENDED); 499 return (rv); 500 default: 501 return (DDI_FAILURE); 502 } 503 504 if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS) 505 return (DDI_FAILURE); 506 507 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 508 goto bad; 509 } 510 511 if (mc_timeout_period == 0) { 512 mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi, 513 DDI_PROP_DONTPASS, "mc-timeout-interval-sec", 514 mc_patrol_interval_sec); 515 mc_timeout_period = drv_usectohz(1000000 * 516 mc_patrol_interval_sec / OPL_MAX_BOARDS); 517 } 518 519 /* set informations in mc state */ 520 mcp->mc_dip = devi; 521 522 if (mc_board_add(mcp)) 523 goto bad; 524 525 insert_mcp(mcp); 526 527 /* 528 * Start the polling thread if it is not running already. 529 */ 530 mutex_enter(&mc_polling_lock); 531 if (!mc_pollthr_running) { 532 (void) thread_create(NULL, 0, (void (*)())mc_polling_thread, 533 NULL, 0, &p0, TS_RUN, mc_poll_priority); 534 } 535 mutex_exit(&mc_polling_lock); 536 ddi_report_dev(devi); 537 538 return (DDI_SUCCESS); 539 540 bad: 541 ddi_soft_state_free(mc_statep, instance); 542 return (DDI_FAILURE); 543 } 544 545 /* ARGSUSED */ 546 static int 547 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 548 { 549 int rv; 550 int instance; 551 mc_opl_t *mcp; 552 553 /* get the instance of this devi */ 554 instance = ddi_get_instance(devi); 555 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 556 return (DDI_FAILURE); 557 } 558 559 switch (cmd) { 560 case DDI_SUSPEND: 561 rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED); 562 return (rv); 563 case DDI_DETACH: 564 break; 565 default: 566 return (DDI_FAILURE); 567 } 568 569 delete_mcp(mcp); 570 if (mc_board_del(mcp) != DDI_SUCCESS) { 571 return (DDI_FAILURE); 572 } 573 574 /* free up the soft state */ 575 ddi_soft_state_free(mc_statep, instance); 576 577 return (DDI_SUCCESS); 578 } 579 580 /* ARGSUSED */ 581 static int 582 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp) 583 { 584 return (0); 585 } 586 587 /* ARGSUSED */ 588 static int 589 mc_close(dev_t devp, int flag, int otyp, cred_t *credp) 590 { 591 return (0); 592 } 593 594 /* ARGSUSED */ 595 static int 596 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 597 int *rvalp) 598 { 599 #ifdef DEBUG 600 return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp)); 601 #else 602 return (ENXIO); 603 #endif 604 } 605 606 /* 607 * PA validity check: 608 * This function return 1 if the PA is a valid PA 609 * in the running Solaris instance i.e. in physinstall 610 * Otherwise, return 0. 611 */ 612 613 /* ARGSUSED */ 614 static int 615 pa_is_valid(mc_opl_t *mcp, uint64_t addr) 616 { 617 if (mcp->mlist == NULL) 618 mc_get_mlist(mcp); 619 620 if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) { 621 return (1); 622 } 623 return (0); 624 } 625 626 /* 627 * mac-pa translation routines. 628 * 629 * Input: mc driver state, (LSB#, Bank#, DIMM address) 630 * Output: physical address 631 * 632 * Valid - return value: 0 633 * Invalid - return value: -1 634 */ 635 static int 636 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa) 637 { 638 int i; 639 uint64_t pa_offset = 0; 640 int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1; 641 int bank = maddr->ma_bank; 642 mc_addr_t maddr1; 643 int bank0, bank1; 644 645 MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank, 646 maddr->ma_dimm_addr); 647 648 /* loc validity check */ 649 ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd); 650 ASSERT(bank >= 0 && OPL_BANK_MAX > bank); 651 652 /* Do translation */ 653 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 654 int pa_bit = 0; 655 int mc_bit = mcp->mc_trans_table[cs][i]; 656 if (mc_bit < MC_ADDRESS_BITS) { 657 pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1; 658 } else if (mc_bit == MP_NONE) { 659 pa_bit = 0; 660 } else if (mc_bit == MP_BANK_0) { 661 pa_bit = bank & 1; 662 } else if (mc_bit == MP_BANK_1) { 663 pa_bit = (bank >> 1) & 1; 664 } else if (mc_bit == MP_BANK_2) { 665 pa_bit = (bank >> 2) & 1; 666 } 667 pa_offset |= ((uint64_t)pa_bit) << i; 668 } 669 *pa = mcp->mc_start_address + pa_offset; 670 MC_LOG("pa = %lx\n", *pa); 671 672 if (pa_to_maddr(mcp, *pa, &maddr1) == -1) { 673 cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to " 674 "convert PA %lx\n", maddr->ma_bd, bank, 675 maddr->ma_dimm_addr, *pa); 676 return (-1); 677 } 678 679 /* 680 * In mirror mode, PA is always translated to the even bank. 681 */ 682 if (IS_MIRROR(mcp, maddr->ma_bank)) { 683 bank0 = maddr->ma_bank & ~(1); 684 bank1 = maddr1.ma_bank & ~(1); 685 } else { 686 bank0 = maddr->ma_bank; 687 bank1 = maddr1.ma_bank; 688 } 689 /* 690 * there is no need to check ma_bd because it is generated from 691 * mcp. They are the same. 692 */ 693 if ((bank0 == bank1) && (maddr->ma_dimm_addr == 694 maddr1.ma_dimm_addr)) { 695 return (0); 696 } else { 697 cmn_err(CE_WARN, "Translation error source /LSB%d/B%d/%x, " 698 "PA %lx, target /LSB%d/B%d/%x\n", maddr->ma_bd, bank, 699 maddr->ma_dimm_addr, *pa, maddr1.ma_bd, maddr1.ma_bank, 700 maddr1.ma_dimm_addr); 701 return (-1); 702 } 703 } 704 705 /* 706 * PA to CS (used by pa_to_maddr). 707 */ 708 static int 709 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset) 710 { 711 int i; 712 int cs = 1; 713 714 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 715 /* MAC address bit<29> is arranged on the same PA bit */ 716 /* on both table. So we may use any table. */ 717 if (mcp->mc_trans_table[0][i] == CS_SHIFT) { 718 cs = (pa_offset >> i) & 1; 719 break; 720 } 721 } 722 return (cs); 723 } 724 725 /* 726 * PA to DIMM (used by pa_to_maddr). 727 */ 728 /* ARGSUSED */ 729 static uint32_t 730 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset) 731 { 732 int i; 733 int cs = pa_to_cs(mcp, pa_offset); 734 uint32_t dimm_addr = 0; 735 736 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 737 int pa_bit_value = (pa_offset >> i) & 1; 738 int mc_bit = mcp->mc_trans_table[cs][i]; 739 if (mc_bit < MC_ADDRESS_BITS) { 740 dimm_addr |= pa_bit_value << mc_bit; 741 } 742 } 743 dimm_addr |= cs << CS_SHIFT; 744 return (dimm_addr); 745 } 746 747 /* 748 * PA to Bank (used by pa_to_maddr). 749 */ 750 static int 751 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset) 752 { 753 int i; 754 int cs = pa_to_cs(mcp, pa_offset); 755 int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT]; 756 757 758 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 759 int pa_bit_value = (pa_offset >> i) & 1; 760 int mc_bit = mcp->mc_trans_table[cs][i]; 761 switch (mc_bit) { 762 case MP_BANK_0: 763 bankno |= pa_bit_value; 764 break; 765 case MP_BANK_1: 766 bankno |= pa_bit_value << 1; 767 break; 768 case MP_BANK_2: 769 bankno |= pa_bit_value << 2; 770 break; 771 } 772 } 773 774 return (bankno); 775 } 776 777 /* 778 * PA to MAC address translation 779 * 780 * Input: MAC driver state, physicall adress 781 * Output: LSB#, Bank id, mac address 782 * 783 * Valid - return value: 0 784 * Invalid - return value: -1 785 */ 786 787 int 788 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr) 789 { 790 uint64_t pa_offset; 791 792 if (!mc_rangecheck_pa(mcp, pa)) 793 return (-1); 794 795 /* Do translation */ 796 pa_offset = pa - mcp->mc_start_address; 797 798 maddr->ma_bd = mcp->mc_board_num; 799 maddr->ma_phys_bd = mcp->mc_phys_board_num; 800 maddr->ma_bank = pa_to_bank(mcp, pa_offset); 801 maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset); 802 MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", pa_offset, maddr->ma_bd, 803 maddr->ma_bank, maddr->ma_dimm_addr); 804 return (0); 805 } 806 807 /* 808 * UNUM format for DC is "/CMUnn/MEMxyZ", where 809 * nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3. 810 * x = MAC 0..3 811 * y = 0..3 (slot info). 812 * Z = 'A' or 'B' 813 * 814 * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where 815 * x = 0..3 (MEMB number) 816 * y = 0..3 (slot info). 817 * Z = 'A' or 'B' 818 * 819 * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ" 820 * x = 0..7 (MEMB number) 821 * y = 0..3 (slot info). 822 * Z = 'A' or 'B' 823 */ 824 int 825 mc_set_mem_unum(char *buf, int buflen, int sb, int bank, 826 uint32_t mf_type, uint32_t d_slot) 827 { 828 char *dimmnm; 829 char memb_num; 830 int cs; 831 int i; 832 int j; 833 834 cs = SLOT_TO_CS(d_slot); 835 836 if (plat_model == MODEL_DC) { 837 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 838 mf_type == FLT_TYPE_PERMANENT_CE) { 839 i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 840 dimmnm = mc_dc_dimm_unum_table[i]; 841 snprintf(buf, buflen, "/%s%02d/MEM%s", 842 model_names[plat_model].unit_name, sb, dimmnm); 843 } else { 844 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 845 j = (cs == 0) ? i : i + 2; 846 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s", 847 model_names[plat_model].unit_name, sb, 848 mc_dc_dimm_unum_table[j], 849 mc_dc_dimm_unum_table[j + 1]); 850 } 851 } else { 852 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 853 mf_type == FLT_TYPE_PERMANENT_CE) { 854 i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 855 dimmnm = mc_ff_dimm_unum_table[i]; 856 memb_num = dimmnm[0]; 857 snprintf(buf, buflen, "/%s/%s%c/MEM%s", 858 model_names[plat_model].unit_name, 859 model_names[plat_model].mem_name, 860 memb_num, &dimmnm[1]); 861 } else { 862 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 863 j = (cs == 0) ? i : i + 2; 864 memb_num = mc_ff_dimm_unum_table[i][0], 865 snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s", 866 model_names[plat_model].unit_name, 867 model_names[plat_model].mem_name, memb_num, 868 &mc_ff_dimm_unum_table[j][1], 869 &mc_ff_dimm_unum_table[j + 1][1]); 870 } 871 } 872 return (0); 873 } 874 875 static void 876 mc_ereport_post(mc_aflt_t *mc_aflt) 877 { 878 char buf[FM_MAX_CLASS]; 879 char device_path[MAXPATHLEN]; 880 char sid[MAXPATHLEN]; 881 nv_alloc_t *nva = NULL; 882 nvlist_t *ereport, *detector, *resource; 883 errorq_elem_t *eqep; 884 int nflts; 885 mc_flt_stat_t *flt_stat; 886 int i, n; 887 int blen = MAXPATHLEN; 888 char *p, *s = NULL; 889 uint32_t values[2], synd[2], dslot[2]; 890 uint64_t offset = (uint64_t)-1; 891 int ret = -1; 892 893 if (panicstr) { 894 eqep = errorq_reserve(ereport_errorq); 895 if (eqep == NULL) 896 return; 897 ereport = errorq_elem_nvl(ereport_errorq, eqep); 898 nva = errorq_elem_nva(ereport_errorq, eqep); 899 } else { 900 ereport = fm_nvlist_create(nva); 901 } 902 903 /* 904 * Create the scheme "dev" FMRI. 905 */ 906 detector = fm_nvlist_create(nva); 907 resource = fm_nvlist_create(nva); 908 909 nflts = mc_aflt->mflt_nflts; 910 911 ASSERT(nflts >= 1 && nflts <= 2); 912 913 flt_stat = mc_aflt->mflt_stat[0]; 914 (void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path); 915 (void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL, 916 device_path, NULL); 917 918 /* 919 * Encode all the common data into the ereport. 920 */ 921 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", MC_OPL_ERROR_CLASS, 922 mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : MC_OPL_MI_SUBCLASS, 923 mc_aflt->mflt_erpt_class); 924 925 MC_LOG("mc_ereport_post: ereport %s\n", buf); 926 927 928 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 929 fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), detector, NULL); 930 931 /* 932 * Set payload. 933 */ 934 fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32, 935 flt_stat->mf_flt_maddr.ma_bd, NULL); 936 937 fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64, 938 flt_stat->mf_flt_paddr, NULL); 939 940 if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE || 941 flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 942 fm_payload_set(ereport, MC_OPL_FLT_TYPE, DATA_TYPE_UINT8, 943 ECC_STICKY, NULL); 944 } 945 946 for (i = 0; i < nflts; i++) 947 values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank; 948 949 fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, nflts, 950 values, NULL); 951 952 for (i = 0; i < nflts; i++) 953 values[i] = mc_aflt->mflt_stat[i]->mf_cntl; 954 955 fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, nflts, 956 values, NULL); 957 958 for (i = 0; i < nflts; i++) 959 values[i] = mc_aflt->mflt_stat[i]->mf_err_add; 960 961 /* offset is set only for PCE and ICE */ 962 if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_INTERMITTENT_CE || 963 mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) { 964 offset = values[0]; 965 966 } 967 fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, nflts, 968 values, NULL); 969 970 for (i = 0; i < nflts; i++) 971 values[i] = mc_aflt->mflt_stat[i]->mf_err_log; 972 973 fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, nflts, 974 values, NULL); 975 976 for (i = 0; i < nflts; i++) { 977 flt_stat = mc_aflt->mflt_stat[i]; 978 if (flt_stat->mf_errlog_valid) { 979 synd[i] = flt_stat->mf_synd; 980 dslot[i] = flt_stat->mf_dimm_slot; 981 values[i] = flt_stat->mf_dram_place; 982 } else { 983 synd[i] = 0; 984 dslot[i] = 0; 985 values[i] = 0; 986 } 987 } 988 989 fm_payload_set(ereport, MC_OPL_ERR_SYND, DATA_TYPE_UINT32_ARRAY, nflts, 990 synd, NULL); 991 992 fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, DATA_TYPE_UINT32_ARRAY, 993 nflts, dslot, NULL); 994 995 fm_payload_set(ereport, MC_OPL_ERR_DRAM, DATA_TYPE_UINT32_ARRAY, nflts, 996 values, NULL); 997 998 device_path[0] = 0; 999 p = &device_path[0]; 1000 sid[0] = 0; 1001 s = &sid[0]; 1002 ret = 0; 1003 1004 for (i = 0; i < nflts; i++) { 1005 int bank; 1006 1007 flt_stat = mc_aflt->mflt_stat[i]; 1008 bank = flt_stat->mf_flt_maddr.ma_bank; 1009 ret = mc_set_mem_unum(p + strlen(p), blen, 1010 flt_stat->mf_flt_maddr.ma_phys_bd, bank, flt_stat->mf_type, 1011 flt_stat->mf_dimm_slot); 1012 1013 if (ret != 0) { 1014 cmn_err(CE_WARN, 1015 "mc_ereport_post: Failed to determine the unum " 1016 "for board=%d bank=%d type=0x%x slot=0x%x", 1017 flt_stat->mf_flt_maddr.ma_bd, bank, 1018 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1019 continue; 1020 } 1021 n = strlen(device_path); 1022 blen = MAXPATHLEN - n; 1023 p = &device_path[n]; 1024 if (i < (nflts - 1)) { 1025 snprintf(p, blen, " "); 1026 blen--; 1027 p++; 1028 } 1029 1030 if (ret == 0) { 1031 ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s), 1032 blen, flt_stat->mf_flt_maddr.ma_phys_bd, bank, 1033 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1034 1035 } 1036 } 1037 1038 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1039 device_path, (ret == 0) ? sid : NULL, (ret == 0) ? offset : 1040 (uint64_t)-1); 1041 1042 fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, resource, 1043 NULL); 1044 1045 if (panicstr) { 1046 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1047 } else { 1048 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1049 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1050 fm_nvlist_destroy(detector, FM_NVA_FREE); 1051 fm_nvlist_destroy(resource, FM_NVA_FREE); 1052 } 1053 } 1054 1055 1056 static void 1057 mc_err_drain(mc_aflt_t *mc_aflt) 1058 { 1059 int rv; 1060 uint64_t pa = (uint64_t)(-1); 1061 int i; 1062 1063 MC_LOG("mc_err_drain: %s\n", mc_aflt->mflt_erpt_class); 1064 /* 1065 * we come here only when we have: 1066 * In mirror mode: MUE, SUE 1067 * In normal mode: UE, Permanent CE, Intermittent CE 1068 */ 1069 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1070 rv = mcaddr_to_pa(mc_aflt->mflt_mcp, 1071 &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa); 1072 1073 /* Ensure the pa is valid (not in isolated memory block) */ 1074 if (rv == 0 && pa_is_valid(mc_aflt->mflt_mcp, pa)) 1075 mc_aflt->mflt_stat[i]->mf_flt_paddr = pa; 1076 else 1077 mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1; 1078 } 1079 1080 MC_LOG("mc_err_drain:pa = %lx\n", pa); 1081 1082 switch (page_retire_check(pa, NULL)) { 1083 case 0: 1084 case EAGAIN: 1085 MC_LOG("Page retired or pending\n"); 1086 return; 1087 case EIO: 1088 /* 1089 * Do page retirement except for the PCE and ICE cases. 1090 * This is taken care by the OPL DE 1091 */ 1092 if (mc_aflt->mflt_stat[0]->mf_type != 1093 FLT_TYPE_INTERMITTENT_CE && 1094 mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) { 1095 MC_LOG("offline page at pa %lx error %x\n", pa, 1096 mc_aflt->mflt_pr); 1097 (void) page_retire(pa, mc_aflt->mflt_pr); 1098 } 1099 break; 1100 case EINVAL: 1101 default: 1102 /* 1103 * Some memory do not have page structure so 1104 * we keep going in case of EINVAL. 1105 */ 1106 break; 1107 } 1108 1109 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1110 mc_aflt_t mc_aflt0; 1111 if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) { 1112 mc_aflt0 = *mc_aflt; 1113 mc_aflt0.mflt_nflts = 1; 1114 mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i]; 1115 mc_ereport_post(&mc_aflt0); 1116 } 1117 } 1118 } 1119 1120 /* 1121 * The restart address is actually defined in unit of PA[37:6] 1122 * the mac patrol will convert that to dimm offset. If the 1123 * address is not in the bank, it will continue to search for 1124 * the next PA that is within the bank. 1125 * 1126 * Also the mac patrol scans the dimms based on PA, not 1127 * dimm offset. 1128 */ 1129 static int 1130 restart_patrol(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr_info) 1131 { 1132 uint64_t pa; 1133 int rv; 1134 1135 if (MC_REWRITE_MODE(mcp, bank)) { 1136 return (0); 1137 } 1138 if (rsaddr_info == NULL || (rsaddr_info->mi_valid == 0)) { 1139 MAC_PTRL_START(mcp, bank); 1140 return (0); 1141 } 1142 1143 rv = mcaddr_to_pa(mcp, &rsaddr_info->mi_restartaddr, &pa); 1144 if (rv != 0) { 1145 MC_LOG("cannot convert mcaddr to pa. use auto restart\n"); 1146 MAC_PTRL_START(mcp, bank); 1147 return (0); 1148 } 1149 1150 if (!mc_rangecheck_pa(mcp, pa)) { 1151 /* pa is not on this board, just retry */ 1152 cmn_err(CE_WARN, "restart_patrol: invalid address %lx " 1153 "on board %d\n", pa, mcp->mc_board_num); 1154 MAC_PTRL_START(mcp, bank); 1155 return (0); 1156 } 1157 1158 MC_LOG("restart_patrol: pa = %lx\n", pa); 1159 1160 if (!rsaddr_info->mi_injectrestart) { 1161 /* 1162 * For non-error injection restart we need to 1163 * determine if the current restart pa/page is 1164 * a "good" page. A "good" page is a page that 1165 * has not been page retired. If the current 1166 * page that contains the pa is "good", we will 1167 * do a HW auto restart and let HW patrol continue 1168 * where it last stopped. Most desired scenario. 1169 * 1170 * If the current page is not "good", we will advance 1171 * to the next page to find the next "good" page and 1172 * restart the patrol from there. 1173 */ 1174 int wrapcount = 0; 1175 uint64_t origpa = pa; 1176 while (wrapcount < 2) { 1177 if (!pa_is_valid(mcp, pa)) { 1178 /* 1179 * Not in physinstall - advance to the 1180 * next memory isolation blocksize 1181 */ 1182 MC_LOG("Invalid PA\n"); 1183 pa = roundup(pa + 1, mc_isolation_bsize); 1184 } else { 1185 int rv; 1186 if ((rv = page_retire_check(pa, NULL)) != 0 && 1187 rv != EAGAIN) { 1188 /* 1189 * The page is "good" (not retired), 1190 * we will use automatic HW restart 1191 * algorithm if this is the original 1192 * current starting page. 1193 */ 1194 if (pa == origpa) { 1195 MC_LOG("Page has no error. " 1196 "Auto restart\n"); 1197 MAC_PTRL_START(mcp, bank); 1198 return (0); 1199 } else { 1200 /* 1201 * found a subsequent good page 1202 */ 1203 break; 1204 } 1205 } 1206 1207 /* 1208 * Skip to the next page 1209 */ 1210 pa = roundup(pa + 1, PAGESIZE); 1211 MC_LOG("Skipping bad page to %lx\n", pa); 1212 } 1213 1214 /* Check to see if we hit the end of the memory range */ 1215 if (pa >= (mcp->mc_start_address + mcp->mc_size)) { 1216 MC_LOG("Wrap around\n"); 1217 pa = mcp->mc_start_address; 1218 wrapcount++; 1219 } 1220 } 1221 1222 if (wrapcount > 1) { 1223 MC_LOG("Failed to find a good page. Just restart\n"); 1224 MAC_PTRL_START(mcp, bank); 1225 return (0); 1226 } 1227 } 1228 1229 /* 1230 * We reached here either: 1231 * 1. We are doing an error injection restart that specify 1232 * the exact pa/page to restart. OR 1233 * 2. We found a subsequent good page different from the 1234 * original restart pa/page. 1235 * Restart MAC patrol: PA[37:6] 1236 */ 1237 MC_LOG("restart at pa = %lx\n", pa); 1238 ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa)); 1239 MAC_PTRL_START_ADD(mcp, bank); 1240 1241 return (0); 1242 } 1243 1244 static void 1245 mc_retry_info_put(mc_retry_info_t **q, mc_retry_info_t *p) 1246 { 1247 ASSERT(p != NULL); 1248 p->ri_next = *q; 1249 *q = p; 1250 } 1251 1252 static mc_retry_info_t * 1253 mc_retry_info_get(mc_retry_info_t **q) 1254 { 1255 mc_retry_info_t *p; 1256 1257 if ((p = *q) != NULL) { 1258 *q = p->ri_next; 1259 return (p); 1260 } else { 1261 return (NULL); 1262 } 1263 } 1264 1265 /* 1266 * Rewriting is used for two purposes. 1267 * - to correct the error in memory. 1268 * - to determine whether the error is permanent or intermittent. 1269 * It's done by writing the address in MAC_BANKm_REWRITE_ADD 1270 * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that, 1271 * REW_END (and REW_CE/REW_UE if some error detected) is set when 1272 * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM. 1273 * 1274 * Note that rewrite operation doesn't change RAW_UE to Marked UE. 1275 * Therefore, we use it only CE case. 1276 */ 1277 1278 static uint32_t 1279 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr, int retrying) 1280 { 1281 uint32_t cntl; 1282 int count = 0; 1283 int max_count; 1284 int retry_state; 1285 1286 if (retrying) 1287 max_count = 1; 1288 else 1289 max_count = mc_max_rewrite_loop; 1290 1291 retry_state = RETRY_STATE_PENDING; 1292 1293 if (!retrying && MC_REWRITE_MODE(mcp, bank)) { 1294 goto timeout; 1295 } 1296 1297 retry_state = RETRY_STATE_ACTIVE; 1298 1299 /* first wait to make sure PTRL_STATUS is 0 */ 1300 while (count++ < max_count) { 1301 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1302 if (!(cntl & MAC_CNTL_PTRL_STATUS)) { 1303 count = 0; 1304 break; 1305 } 1306 drv_usecwait(mc_rewrite_delay); 1307 } 1308 if (count >= max_count) 1309 goto timeout; 1310 1311 count = 0; 1312 1313 ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr); 1314 MAC_REW_REQ(mcp, bank); 1315 1316 retry_state = RETRY_STATE_REWRITE; 1317 1318 do { 1319 if (count++ > max_count) { 1320 goto timeout; 1321 } else { 1322 drv_usecwait(mc_rewrite_delay); 1323 } 1324 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1325 /* 1326 * If there are other MEMORY or PCI activities, this 1327 * will be BUSY, else it should be set immediately 1328 */ 1329 } while (!(cntl & MAC_CNTL_REW_END)); 1330 1331 MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS); 1332 return (cntl); 1333 timeout: 1334 mc_set_rewrite(mcp, bank, dimm_addr, retry_state); 1335 1336 return (0); 1337 } 1338 1339 void 1340 mc_clear_rewrite(mc_opl_t *mcp, int bank) 1341 { 1342 struct mc_bank *bankp; 1343 mc_retry_info_t *retry; 1344 uint32_t rew_addr; 1345 1346 bankp = &(mcp->mc_bank[bank]); 1347 retry = bankp->mcb_active; 1348 bankp->mcb_active = NULL; 1349 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 1350 1351 again: 1352 bankp->mcb_rewrite_count = 0; 1353 1354 while (retry = mc_retry_info_get(&bankp->mcb_retry_pending)) { 1355 rew_addr = retry->ri_addr; 1356 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 1357 if (do_rewrite(mcp, bank, rew_addr, 1) == 0) 1358 break; 1359 } 1360 1361 /* we break out if no more pending rewrite or we got timeout again */ 1362 1363 if (!bankp->mcb_active && !bankp->mcb_retry_pending) { 1364 if (!IS_MIRROR(mcp, bank)) { 1365 MC_CLEAR_REWRITE_MODE(mcp, bank); 1366 } else { 1367 int mbank = bank ^ 1; 1368 bankp = &(mcp->mc_bank[mbank]); 1369 if (!bankp->mcb_active && !bankp->mcb_retry_pending) { 1370 MC_CLEAR_REWRITE_MODE(mcp, bank); 1371 MC_CLEAR_REWRITE_MODE(mcp, mbank); 1372 } else { 1373 bank = mbank; 1374 goto again; 1375 } 1376 } 1377 } 1378 } 1379 1380 void 1381 mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state) 1382 { 1383 mc_retry_info_t *retry; 1384 struct mc_bank *bankp; 1385 1386 bankp = &mcp->mc_bank[bank]; 1387 1388 retry = mc_retry_info_get(&bankp->mcb_retry_freelist); 1389 1390 ASSERT(retry != NULL); 1391 1392 retry->ri_addr = addr; 1393 retry->ri_state = state; 1394 1395 MC_SET_REWRITE_MODE(mcp, bank); 1396 1397 if ((state > RETRY_STATE_PENDING)) { 1398 ASSERT(bankp->mcb_active == NULL); 1399 bankp->mcb_active = retry; 1400 } else { 1401 mc_retry_info_put(&bankp->mcb_retry_pending, retry); 1402 } 1403 1404 if (IS_MIRROR(mcp, bank)) { 1405 int mbank = bank ^1; 1406 MC_SET_REWRITE_MODE(mcp, mbank); 1407 } 1408 } 1409 1410 void 1411 mc_process_scf_log(mc_opl_t *mcp) 1412 { 1413 int count; 1414 int n = 0; 1415 scf_log_t *p; 1416 int bank; 1417 1418 for (bank = 0; bank < BANKNUM_PER_SB; bank++) { 1419 while ((p = mcp->mc_scf_log[bank]) != NULL && 1420 (n < mc_max_errlog_processed)) { 1421 ASSERT(bank == p->sl_bank); 1422 count = 0; 1423 while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank)) 1424 & MAC_STATIC_ERR_VLD)) { 1425 if (count++ >= (mc_max_scf_loop)) { 1426 break; 1427 } 1428 drv_usecwait(mc_scf_delay); 1429 } 1430 1431 if (count < mc_max_scf_loop) { 1432 ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank), 1433 p->sl_err_log); 1434 1435 ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank), 1436 p->sl_err_add|MAC_STATIC_ERR_VLD); 1437 mcp->mc_scf_retry[bank] = 0; 1438 } else { 1439 /* 1440 * if we try too many times, just drop the req 1441 */ 1442 if (mcp->mc_scf_retry[bank]++ <= 1443 mc_max_scf_retry) { 1444 return; 1445 } else { 1446 if ((++mc_pce_dropped & 0xff) == 0) { 1447 cmn_err(CE_WARN, "Cannot " 1448 "report Permanent CE to " 1449 "SCF\n"); 1450 } 1451 } 1452 } 1453 n++; 1454 mcp->mc_scf_log[bank] = p->sl_next; 1455 mcp->mc_scf_total[bank]--; 1456 ASSERT(mcp->mc_scf_total[bank] >= 0); 1457 kmem_free(p, sizeof (scf_log_t)); 1458 } 1459 } 1460 } 1461 void 1462 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank) 1463 { 1464 scf_log_t *p; 1465 1466 if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) { 1467 if ((++mc_pce_dropped & 0xff) == 0) { 1468 cmn_err(CE_WARN, "Too many Permanent CE requests.\n"); 1469 } 1470 return; 1471 } 1472 p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP); 1473 p->sl_next = 0; 1474 p->sl_err_add = flt_stat->mf_err_add; 1475 p->sl_err_log = flt_stat->mf_err_log; 1476 p->sl_bank = bank; 1477 1478 if (mcp->mc_scf_log[bank] == NULL) { 1479 /* 1480 * we rely on mc_scf_log to detect NULL queue. 1481 * mc_scf_log_tail is irrelevant is such case. 1482 */ 1483 mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p; 1484 } else { 1485 mcp->mc_scf_log_tail[bank]->sl_next = p; 1486 mcp->mc_scf_log_tail[bank] = p; 1487 } 1488 mcp->mc_scf_total[bank]++; 1489 } 1490 /* 1491 * This routine determines what kind of CE happens, intermittent 1492 * or permanent as follows. (See 4.7.3 in Columbus2 PRM.) 1493 * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register. 1494 * - If CE is still detected on the same address even after doing 1495 * rewrite operation twice, it is determined as permanent error. 1496 * - If error is not detected anymore, it is determined as intermittent 1497 * error. 1498 * - If UE is detected due to rewrite operation, it should be treated 1499 * as UE. 1500 */ 1501 1502 /* ARGSUSED */ 1503 static void 1504 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error) 1505 { 1506 uint32_t cntl; 1507 int i; 1508 1509 flt_stat->mf_type = FLT_TYPE_PERMANENT_CE; 1510 /* 1511 * rewrite request 1st time reads and correct error data 1512 * and write to DIMM. 2nd rewrite request must be issued 1513 * after REW_CE/UE/END is 0. When the 2nd request is completed, 1514 * if REW_CE = 1, then it is permanent CE. 1515 */ 1516 for (i = 0; i < 2; i++) { 1517 cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add, 0); 1518 1519 if (cntl == 0) { 1520 /* timeout case */ 1521 return; 1522 } 1523 /* 1524 * If the error becomes UE or CMPE 1525 * we return to the caller immediately. 1526 */ 1527 if (cntl & MAC_CNTL_REW_UE) { 1528 if (ptrl_error) 1529 flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE; 1530 else 1531 flt_stat->mf_cntl |= MAC_CNTL_MI_UE; 1532 flt_stat->mf_type = FLT_TYPE_UE; 1533 return; 1534 } 1535 if (cntl & MAC_CNTL_REW_CMPE) { 1536 if (ptrl_error) 1537 flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE; 1538 else 1539 flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE; 1540 flt_stat->mf_type = FLT_TYPE_CMPE; 1541 return; 1542 } 1543 } 1544 if (!(cntl & MAC_CNTL_REW_CE)) { 1545 flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE; 1546 } 1547 1548 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1549 /* report PERMANENT_CE to SP via SCF */ 1550 if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) { 1551 mc_queue_scf_log(mcp, flt_stat, bank); 1552 } 1553 } 1554 } 1555 1556 #define IS_CMPE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\ 1557 MAC_CNTL_MI_CMPE)) 1558 #define IS_UE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE)) 1559 #define IS_CE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE)) 1560 #define IS_OK(cntl, f) (!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \ 1561 MAC_CNTL_MI_ERRS))) 1562 1563 1564 static int 1565 IS_CE_ONLY(uint32_t cntl, int ptrl_error) 1566 { 1567 if (ptrl_error) { 1568 return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE); 1569 } else { 1570 return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE); 1571 } 1572 } 1573 1574 void 1575 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value) 1576 { 1577 int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank; 1578 1579 if (mcp->mc_speedup_period[ebank] > 0) 1580 value |= mc_max_speed; 1581 else 1582 value |= mcp->mc_speed; 1583 ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value); 1584 } 1585 1586 static void 1587 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1588 { 1589 flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1590 MAC_CNTL_PTRL_ERRS; 1591 flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank)); 1592 flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank)); 1593 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1594 flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num; 1595 flt_stat->mf_flt_maddr.ma_bank = bank; 1596 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1597 } 1598 1599 static void 1600 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1601 { 1602 uint32_t status, old_status; 1603 1604 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & MAC_CNTL_MI_ERRS; 1605 old_status = 0; 1606 1607 /* we keep reading until the status is stable */ 1608 while (old_status != status) { 1609 old_status = status; 1610 flt_stat->mf_err_add = LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank)); 1611 flt_stat->mf_err_log = LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank)); 1612 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1613 MAC_CNTL_MI_ERRS; 1614 if (status == old_status) { 1615 break; 1616 } 1617 } 1618 1619 flt_stat->mf_cntl = status; 1620 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1621 flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num; 1622 flt_stat->mf_flt_maddr.ma_bank = bank; 1623 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1624 } 1625 1626 1627 /* 1628 * Error philosophy for mirror mode: 1629 * 1630 * PTRL (The error address for both banks are same, since ptrl stops if it 1631 * detects error.) 1632 * - Compare error log CMPE. 1633 * 1634 * - UE-UE Report MUE. No rewrite. 1635 * 1636 * - UE-* UE-(CE/OK). Rewrite to scrub UE. Report SUE. 1637 * 1638 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1639 * If CE is permanent, inform SCF. Once for each 1640 * Dimm. If CE becomes UE or CMPE, go back to above. 1641 * 1642 * 1643 * MI (The error addresses for each bank are the same or different.) 1644 * - Compare error If addresses are the same. Just CMPE, so log CMPE. 1645 * If addresses are different (this could happen 1646 * as a result of scrubbing. Report each separately. 1647 * Only report error info on each side. 1648 * 1649 * - UE-UE Addresses are the same. Report MUE. 1650 * Addresses are different. Report SUE on each bank. 1651 * Rewrite to clear UE. 1652 * 1653 * - UE-* UE-(CE/OK) 1654 * Rewrite to clear UE. Report SUE for the bank. 1655 * 1656 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1657 * If CE becomes UE or CMPE, go back to above. 1658 * 1659 */ 1660 1661 static int 1662 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat) 1663 { 1664 int ptrl_error = mc_aflt->mflt_is_ptrl; 1665 int i; 1666 int rv = 0; 1667 int bank; 1668 int rewrite_timeout = 0; 1669 1670 MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n", 1671 flt_stat[0].mf_cntl, flt_stat[1].mf_cntl); 1672 1673 if (ptrl_error) { 1674 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) & 1675 MAC_CNTL_PTRL_ERRS) == 0) 1676 return (0); 1677 } else { 1678 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) & 1679 MAC_CNTL_MI_ERRS) == 0) 1680 return (0); 1681 } 1682 1683 /* 1684 * First we take care of the case of CE 1685 * because they can become UE or CMPE 1686 */ 1687 for (i = 0; i < 2; i++) { 1688 if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) { 1689 bank = flt_stat[i].mf_flt_maddr.ma_bank; 1690 MC_LOG("CE detected on bank %d\n", bank); 1691 mc_scrub_ce(mcp, bank, &flt_stat[i], ptrl_error); 1692 if (MC_REWRITE_ACTIVE(mcp, bank)) { 1693 rewrite_timeout = 1; 1694 } 1695 rv = 1; 1696 } 1697 } 1698 1699 if (rewrite_timeout) 1700 return (0); 1701 1702 /* The above scrubbing can turn CE into UE or CMPE */ 1703 1704 /* 1705 * Now we distinguish two cases: same address or not 1706 * the same address. It might seem more intuitive to 1707 * distinguish PTRL v.s. MI error but it is more 1708 * complicated that way. 1709 */ 1710 1711 if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) { 1712 1713 if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) || 1714 IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) { 1715 flt_stat[0].mf_type = FLT_TYPE_CMPE; 1716 flt_stat[1].mf_type = FLT_TYPE_CMPE; 1717 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1718 mc_aflt->mflt_nflts = 2; 1719 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1720 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1721 mc_aflt->mflt_pr = PR_UE; 1722 /* 1723 * Compare error is result of MAC internal error, so 1724 * simply log it instead of publishing an ereport. SCF 1725 * diagnoses all the MAC internal and its i/f error. 1726 * mc_err_drain(mc_aflt); 1727 */ 1728 MC_LOG("cmpe error detected\n"); 1729 return (1); 1730 } 1731 1732 if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) && 1733 IS_UE(flt_stat[1].mf_cntl, ptrl_error)) { 1734 /* Both side are UE's */ 1735 1736 MAC_SET_ERRLOG_INFO(&flt_stat[0]); 1737 MAC_SET_ERRLOG_INFO(&flt_stat[1]); 1738 MC_LOG("MUE detected\n"); 1739 flt_stat[0].mf_type = FLT_TYPE_MUE; 1740 flt_stat[1].mf_type = FLT_TYPE_MUE; 1741 mc_aflt->mflt_erpt_class = MC_OPL_MUE; 1742 mc_aflt->mflt_nflts = 2; 1743 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1744 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1745 mc_aflt->mflt_pr = PR_UE; 1746 mc_err_drain(mc_aflt); 1747 return (1); 1748 } 1749 1750 /* Now the only case is UE/CE, UE/OK, or don't care */ 1751 for (i = 0; i < 2; i++) { 1752 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1753 1754 /* rewrite can clear the one side UE error */ 1755 1756 if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) { 1757 (void) do_rewrite(mcp, 1758 flt_stat[i].mf_flt_maddr.ma_bank, 1759 flt_stat[i].mf_flt_maddr.ma_dimm_addr, 0); 1760 } 1761 flt_stat[i].mf_type = FLT_TYPE_UE; 1762 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1763 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1764 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1765 mc_aflt->mflt_nflts = 1; 1766 mc_aflt->mflt_pr = PR_MCE; 1767 mc_err_drain(mc_aflt); 1768 /* Once we hit a UE/CE or UE/OK case, done */ 1769 return (1); 1770 } 1771 } 1772 1773 } else { 1774 /* 1775 * addresses are different. That means errors 1776 * on the 2 banks are not related at all. 1777 */ 1778 for (i = 0; i < 2; i++) { 1779 if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) { 1780 flt_stat[i].mf_type = FLT_TYPE_CMPE; 1781 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1782 mc_aflt->mflt_nflts = 1; 1783 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1784 mc_aflt->mflt_pr = PR_UE; 1785 /* 1786 * Compare error is result of MAC internal 1787 * error, so simply log it instead of 1788 * publishing an ereport. SCF diagnoses all 1789 * the MAC internal and its interface error. 1790 * mc_err_drain(mc_aflt); 1791 */ 1792 MC_LOG("cmpe error detected\n"); 1793 /* no more report on this bank */ 1794 flt_stat[i].mf_cntl = 0; 1795 rv = 1; 1796 } 1797 } 1798 1799 /* rewrite can clear the one side UE error */ 1800 1801 for (i = 0; i < 2; i++) { 1802 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1803 (void) do_rewrite(mcp, 1804 flt_stat[i].mf_flt_maddr.ma_bank, 1805 flt_stat[i].mf_flt_maddr.ma_dimm_addr, 1806 0); 1807 flt_stat[i].mf_type = FLT_TYPE_UE; 1808 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1809 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1810 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1811 mc_aflt->mflt_nflts = 1; 1812 mc_aflt->mflt_pr = PR_MCE; 1813 mc_err_drain(mc_aflt); 1814 rv = 1; 1815 } 1816 } 1817 } 1818 return (rv); 1819 } 1820 static void 1821 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr) 1822 { 1823 mc_aflt_t mc_aflt; 1824 mc_flt_stat_t flt_stat[2], mi_flt_stat[2]; 1825 int i; 1826 int mi_valid; 1827 1828 ASSERT(rsaddr); 1829 1830 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1831 bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t)); 1832 bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t)); 1833 1834 1835 mc_aflt.mflt_mcp = mcp; 1836 mc_aflt.mflt_id = gethrtime(); 1837 1838 /* Now read all the registers into flt_stat */ 1839 1840 for (i = 0; i < 2; i++) { 1841 MC_LOG("Reading registers of bank %d\n", bank); 1842 /* patrol registers */ 1843 mc_read_ptrl_reg(mcp, bank, &flt_stat[i]); 1844 1845 /* 1846 * In mirror mode, it is possible that only one bank 1847 * may report the error. We need to check for it to 1848 * ensure we pick the right addr value for patrol restart. 1849 * Note that if both banks reported errors, we pick the 1850 * 2nd one. Both banks should reported the same error address. 1851 */ 1852 if (flt_stat[i].mf_cntl & MAC_CNTL_PTRL_ERRS) 1853 rsaddr->mi_restartaddr = flt_stat[i].mf_flt_maddr; 1854 1855 MC_LOG("ptrl registers cntl %x add %x log %x\n", 1856 flt_stat[i].mf_cntl, flt_stat[i].mf_err_add, 1857 flt_stat[i].mf_err_log); 1858 1859 /* MI registers */ 1860 mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]); 1861 1862 MC_LOG("MI registers cntl %x add %x log %x\n", 1863 mi_flt_stat[i].mf_cntl, mi_flt_stat[i].mf_err_add, 1864 mi_flt_stat[i].mf_err_log); 1865 1866 bank = bank^1; 1867 } 1868 1869 /* clear errors once we read all the registers */ 1870 MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1871 1872 MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1873 1874 /* Process MI errors first */ 1875 1876 /* if not error mode, cntl1 is 0 */ 1877 if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1878 (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1879 mi_flt_stat[0].mf_cntl = 0; 1880 1881 if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1882 (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1883 mi_flt_stat[1].mf_cntl = 0; 1884 1885 mc_aflt.mflt_is_ptrl = 0; 1886 mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]); 1887 1888 if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1889 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[0].mf_cntl & 1890 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 1891 (flt_stat[0].mf_err_add == mi_flt_stat[0].mf_err_add) && 1892 (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1893 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[1].mf_cntl & 1894 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 1895 (flt_stat[1].mf_err_add == mi_flt_stat[1].mf_err_add)) { 1896 #ifdef DEBUG 1897 MC_LOG("discarding PTRL error because " 1898 "it is the same as MI\n"); 1899 #endif 1900 rsaddr->mi_valid = mi_valid; 1901 return; 1902 } 1903 /* if not error mode, cntl1 is 0 */ 1904 if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1905 (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1906 flt_stat[0].mf_cntl = 0; 1907 1908 if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1909 (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1910 flt_stat[1].mf_cntl = 0; 1911 1912 mc_aflt.mflt_is_ptrl = 1; 1913 rsaddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]); 1914 } 1915 static int 1916 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt, 1917 mc_flt_stat_t *flt_stat) 1918 { 1919 int ptrl_error = mc_aflt->mflt_is_ptrl; 1920 int rv = 0; 1921 1922 mc_aflt->mflt_erpt_class = NULL; 1923 if (IS_UE(flt_stat->mf_cntl, ptrl_error)) { 1924 MC_LOG("UE detected\n"); 1925 flt_stat->mf_type = FLT_TYPE_UE; 1926 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1927 mc_aflt->mflt_pr = PR_UE; 1928 MAC_SET_ERRLOG_INFO(flt_stat); 1929 rv = 1; 1930 } else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) { 1931 MC_LOG("CE detected\n"); 1932 MAC_SET_ERRLOG_INFO(flt_stat); 1933 1934 /* Error type can change after scrubbing */ 1935 mc_scrub_ce(mcp, bank, flt_stat, ptrl_error); 1936 if (MC_REWRITE_ACTIVE(mcp, bank)) { 1937 return (0); 1938 } 1939 1940 if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE) { 1941 mc_aflt->mflt_erpt_class = MC_OPL_ICE; 1942 mc_aflt->mflt_pr = PR_MCE; 1943 } else if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1944 mc_aflt->mflt_erpt_class = MC_OPL_CE; 1945 mc_aflt->mflt_pr = PR_MCE; 1946 } else if (flt_stat->mf_type == FLT_TYPE_UE) { 1947 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1948 mc_aflt->mflt_pr = PR_UE; 1949 } 1950 rv = 1; 1951 } 1952 MC_LOG("mc_process_error: fault type %x erpt %s\n", flt_stat->mf_type, 1953 mc_aflt->mflt_erpt_class); 1954 if (mc_aflt->mflt_erpt_class) { 1955 mc_aflt->mflt_stat[0] = flt_stat; 1956 mc_aflt->mflt_nflts = 1; 1957 mc_err_drain(mc_aflt); 1958 } 1959 return (rv); 1960 } 1961 1962 static void 1963 mc_error_handler(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr) 1964 { 1965 mc_aflt_t mc_aflt; 1966 mc_flt_stat_t flt_stat, mi_flt_stat; 1967 int mi_valid; 1968 1969 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1970 bzero(&flt_stat, sizeof (mc_flt_stat_t)); 1971 bzero(&mi_flt_stat, sizeof (mc_flt_stat_t)); 1972 1973 mc_aflt.mflt_mcp = mcp; 1974 mc_aflt.mflt_id = gethrtime(); 1975 1976 /* patrol registers */ 1977 mc_read_ptrl_reg(mcp, bank, &flt_stat); 1978 1979 ASSERT(rsaddr); 1980 rsaddr->mi_restartaddr = flt_stat.mf_flt_maddr; 1981 1982 MC_LOG("ptrl registers cntl %x add %x log %x\n", flt_stat.mf_cntl, 1983 flt_stat.mf_err_add, flt_stat.mf_err_log); 1984 1985 /* MI registers */ 1986 mc_read_mi_reg(mcp, bank, &mi_flt_stat); 1987 1988 1989 MC_LOG("MI registers cntl %x add %x log %x\n", mi_flt_stat.mf_cntl, 1990 mi_flt_stat.mf_err_add, mi_flt_stat.mf_err_log); 1991 1992 /* clear errors once we read all the registers */ 1993 MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1994 1995 mc_aflt.mflt_is_ptrl = 0; 1996 if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) && 1997 ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 1998 ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 1999 mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat); 2000 } 2001 2002 if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >> 2003 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat.mf_cntl & 2004 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 2005 (flt_stat.mf_err_add == mi_flt_stat.mf_err_add)) { 2006 #ifdef DEBUG 2007 MC_LOG("discarding PTRL error because " 2008 "it is the same as MI\n"); 2009 #endif 2010 rsaddr->mi_valid = mi_valid; 2011 return; 2012 } 2013 2014 mc_aflt.mflt_is_ptrl = 1; 2015 if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) && 2016 ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 2017 ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 2018 rsaddr->mi_valid = mc_process_error(mcp, bank, &mc_aflt, 2019 &flt_stat); 2020 } 2021 } 2022 /* 2023 * memory patrol error handling algorithm: 2024 * timeout() is used to do periodic polling 2025 * This is the flow chart. 2026 * timeout -> 2027 * mc_check_errors() 2028 * if memory bank is installed, read the status register 2029 * if any error bit is set, 2030 * -> mc_error_handler() 2031 * -> read all error registers 2032 * -> mc_process_error() 2033 * determine error type 2034 * rewrite to clear error or scrub to determine CE type 2035 * inform SCF on permanent CE 2036 * -> mc_err_drain 2037 * page offline processing 2038 * -> mc_ereport_post() 2039 */ 2040 2041 static void 2042 mc_process_rewrite(mc_opl_t *mcp, int bank) 2043 { 2044 uint32_t rew_addr, cntl; 2045 mc_retry_info_t *retry; 2046 struct mc_bank *bankp; 2047 2048 bankp = &(mcp->mc_bank[bank]); 2049 retry = bankp->mcb_active; 2050 if (retry == NULL) 2051 return; 2052 2053 if (retry->ri_state <= RETRY_STATE_ACTIVE) { 2054 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 2055 if (cntl & MAC_CNTL_PTRL_STATUS) 2056 return; 2057 rew_addr = retry->ri_addr; 2058 ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), rew_addr); 2059 MAC_REW_REQ(mcp, bank); 2060 2061 retry->ri_state = RETRY_STATE_REWRITE; 2062 } 2063 2064 cntl = ldphysio(MAC_PTRL_CNTL(mcp, bank)); 2065 2066 if (cntl & MAC_CNTL_REW_END) { 2067 MAC_CLEAR_ERRS(mcp, bank, 2068 MAC_CNTL_REW_ERRS); 2069 mc_clear_rewrite(mcp, bank); 2070 } else { 2071 /* 2072 * If the rewrite does not complete in 2073 * 1 hour, we have to consider this a HW 2074 * failure. However, there is no recovery 2075 * mechanism. The only thing we can do 2076 * to to print a warning message to the 2077 * console. We continue to increment the 2078 * counter but we only print the message 2079 * once. It will take the counter a long 2080 * time to wrap around and the user might 2081 * see a second message. In practice, 2082 * we have never hit this condition but 2083 * we have to keep the code here just in case. 2084 */ 2085 if (++mcp->mc_bank[bank].mcb_rewrite_count 2086 == mc_max_rewrite_retry) { 2087 cmn_err(CE_WARN, "Memory patrol feature is" 2088 " partly suspended on /LSB%d/B%d" 2089 " due to heavy memory load," 2090 " and it will restart" 2091 " automatically.\n", mcp->mc_board_num, 2092 bank); 2093 } 2094 } 2095 } 2096 2097 static void 2098 mc_check_errors_func(mc_opl_t *mcp) 2099 { 2100 mc_rsaddr_info_t rsaddr_info; 2101 int i, error_count = 0; 2102 uint32_t stat, cntl; 2103 int running; 2104 int wrapped; 2105 int ebk; 2106 2107 /* 2108 * scan errors. 2109 */ 2110 if (mcp->mc_status & MC_MEMORYLESS) 2111 return; 2112 2113 for (i = 0; i < BANKNUM_PER_SB; i++) { 2114 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2115 if (MC_REWRITE_ACTIVE(mcp, i)) { 2116 mc_process_rewrite(mcp, i); 2117 } 2118 stat = ldphysio(MAC_PTRL_STAT(mcp, i)); 2119 cntl = ldphysio(MAC_PTRL_CNTL(mcp, i)); 2120 running = cntl & MAC_CNTL_PTRL_START; 2121 wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX; 2122 2123 /* Compute the effective bank idx */ 2124 ebk = (IS_MIRROR(mcp, i)) ? MIRROR_IDX(i) : i; 2125 2126 if (mc_debug_show_all || stat) { 2127 MC_LOG("/LSB%d/B%d stat %x cntl %x\n", 2128 mcp->mc_board_num, i, stat, cntl); 2129 } 2130 2131 /* 2132 * Update stats and reset flag if the HW patrol 2133 * wrapped around in its scan. 2134 */ 2135 if (wrapped) { 2136 MAC_CLEAR_MAX(mcp, i); 2137 mcp->mc_period[ebk]++; 2138 if (IS_MIRROR(mcp, i)) 2139 MC_LOG("mirror mc period %ld on " 2140 "/LSB%d/B%d\n", mcp->mc_period[ebk], 2141 mcp->mc_board_num, i); 2142 else { 2143 MC_LOG("mc period %ld on " 2144 "/LSB%d/B%d\n", mcp->mc_period[ebk], 2145 mcp->mc_board_num, i); 2146 } 2147 } 2148 2149 if (running) { 2150 /* 2151 * Mac patrol HW is still running. 2152 * Normally when an error is detected, 2153 * the HW patrol will stop so that we 2154 * can collect error data for reporting. 2155 * Certain errors (MI errors) detected may not 2156 * cause the HW patrol to stop which is a 2157 * problem since we cannot read error data while 2158 * the HW patrol is running. SW is not allowed 2159 * to stop the HW patrol while it is running 2160 * as it may cause HW inconsistency. This is 2161 * described in a HW errata. 2162 * In situations where we detected errors 2163 * that may not cause the HW patrol to stop. 2164 * We speed up the HW patrol scanning in 2165 * the hope that it will find the 'real' PTRL 2166 * errors associated with the previous errors 2167 * causing the HW to finally stop so that we 2168 * can do the reporting. 2169 */ 2170 /* 2171 * Check to see if we did speed up 2172 * the HW patrol due to previous errors 2173 * detected that did not cause the patrol 2174 * to stop. We only do it if HW patrol scan 2175 * wrapped (counted as completing a 'period'). 2176 */ 2177 if (mcp->mc_speedup_period[ebk] > 0) { 2178 if (wrapped && 2179 (--mcp->mc_speedup_period[ebk] == 2180 0)) { 2181 /* 2182 * We did try to speed up. 2183 * The speed up period has 2184 * expired and the HW patrol 2185 * is still running. The 2186 * errors must be intermittent. 2187 * We have no choice but to 2188 * ignore them, reset the scan 2189 * speed to normal and clear 2190 * the MI error bits. For 2191 * mirror mode, we need to 2192 * clear errors on both banks. 2193 */ 2194 MC_LOG("Clearing MI errors\n"); 2195 MAC_CLEAR_ERRS(mcp, i, 2196 MAC_CNTL_MI_ERRS); 2197 2198 if (IS_MIRROR(mcp, i)) { 2199 MC_LOG("Clearing " 2200 "Mirror MI errs\n"); 2201 MAC_CLEAR_ERRS(mcp, 2202 i^1, 2203 MAC_CNTL_MI_ERRS); 2204 } 2205 } 2206 } else if (stat & MAC_STAT_MI_ERRS) { 2207 /* 2208 * MI errors detected but we cannot 2209 * report them since the HW patrol 2210 * is still running. 2211 * We will attempt to speed up the 2212 * scanning and hopefully the HW 2213 * can detect PRTL errors at the same 2214 * location that cause the HW patrol 2215 * to stop. 2216 */ 2217 mcp->mc_speedup_period[ebk] = 2; 2218 MAC_CMD(mcp, i, 0); 2219 } 2220 } else if (stat & (MAC_STAT_PTRL_ERRS | 2221 MAC_STAT_MI_ERRS)) { 2222 /* 2223 * HW Patrol has stopped and we found errors. 2224 * Proceed to collect and report error info. 2225 */ 2226 mcp->mc_speedup_period[ebk] = 0; 2227 rsaddr_info.mi_valid = 0; 2228 rsaddr_info.mi_injectrestart = 0; 2229 if (IS_MIRROR(mcp, i)) { 2230 mc_error_handler_mir(mcp, i, 2231 &rsaddr_info); 2232 } else { 2233 mc_error_handler(mcp, i, &rsaddr_info); 2234 } 2235 2236 error_count++; 2237 restart_patrol(mcp, i, &rsaddr_info); 2238 } else { 2239 /* 2240 * HW patrol scan has apparently stopped 2241 * but no errors detected/flagged. 2242 * Restart the HW patrol just to be sure. 2243 * In mirror mode, the odd bank might have 2244 * reported errors that caused the patrol to 2245 * stop. We'll defer the restart to the odd 2246 * bank in this case. 2247 */ 2248 if (!IS_MIRROR(mcp, i) || (i & 0x1)) 2249 restart_patrol(mcp, i, NULL); 2250 } 2251 } 2252 } 2253 if (error_count > 0) 2254 mcp->mc_last_error += error_count; 2255 else 2256 mcp->mc_last_error = 0; 2257 } 2258 2259 /* 2260 * mc_polling -- Check errors for only one instance, 2261 * but process errors for all instances to make sure we drain the errors 2262 * faster than they can be accumulated. 2263 * 2264 * Polling on each board should be done only once per each 2265 * mc_patrol_interval_sec. This is equivalent to setting mc_tick_left 2266 * to OPL_MAX_BOARDS and decrement by 1 on each timeout. 2267 * Once mc_tick_left becomes negative, the board becomes a candidate 2268 * for polling because it has waited for at least 2269 * mc_patrol_interval_sec's long. If mc_timeout_period is calculated 2270 * differently, this has to be updated accordingly. 2271 */ 2272 2273 static void 2274 mc_polling(void) 2275 { 2276 int i, scan_error; 2277 mc_opl_t *mcp; 2278 2279 2280 scan_error = 1; 2281 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2282 mutex_enter(&mcmutex); 2283 if ((mcp = mc_instances[i]) == NULL) { 2284 mutex_exit(&mcmutex); 2285 continue; 2286 } 2287 mutex_enter(&mcp->mc_lock); 2288 mutex_exit(&mcmutex); 2289 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2290 mutex_exit(&mcp->mc_lock); 2291 continue; 2292 } 2293 if (scan_error && mcp->mc_tick_left <= 0) { 2294 mc_check_errors_func((void *)mcp); 2295 mcp->mc_tick_left = OPL_MAX_BOARDS; 2296 scan_error = 0; 2297 } else { 2298 mcp->mc_tick_left--; 2299 } 2300 mc_process_scf_log(mcp); 2301 mutex_exit(&mcp->mc_lock); 2302 } 2303 } 2304 2305 static void 2306 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr) 2307 { 2308 maddr->ma_bd = mcp->mc_board_num; 2309 maddr->ma_bank = bank; 2310 maddr->ma_dimm_addr = 0; 2311 } 2312 2313 typedef struct mc_mem_range { 2314 uint64_t addr; 2315 uint64_t size; 2316 } mc_mem_range_t; 2317 2318 static int 2319 get_base_address(mc_opl_t *mcp) 2320 { 2321 mc_mem_range_t *mem_range; 2322 int len; 2323 2324 if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2325 "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) { 2326 return (DDI_FAILURE); 2327 } 2328 2329 mcp->mc_start_address = mem_range->addr; 2330 mcp->mc_size = mem_range->size; 2331 2332 kmem_free(mem_range, len); 2333 return (DDI_SUCCESS); 2334 } 2335 2336 struct mc_addr_spec { 2337 uint32_t bank; 2338 uint32_t phys_hi; 2339 uint32_t phys_lo; 2340 }; 2341 2342 #define REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo) 2343 2344 static char *mc_tbl_name[] = { 2345 "cs0-mc-pa-trans-table", 2346 "cs1-mc-pa-trans-table" 2347 }; 2348 2349 /* 2350 * This routine performs a rangecheck for a given PA 2351 * to see if it belongs to the memory range for this board. 2352 * Return 1 if it is valid (within the range) and 0 otherwise 2353 */ 2354 static int 2355 mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa) 2356 { 2357 if ((pa < mcp->mc_start_address) || (mcp->mc_start_address + 2358 mcp->mc_size <= pa)) 2359 return (0); 2360 else 2361 return (1); 2362 } 2363 2364 static void 2365 mc_memlist_delete(struct memlist *mlist) 2366 { 2367 struct memlist *ml; 2368 2369 for (ml = mlist; ml; ml = mlist) { 2370 mlist = ml->next; 2371 kmem_free(ml, sizeof (struct memlist)); 2372 } 2373 } 2374 2375 static struct memlist * 2376 mc_memlist_dup(struct memlist *mlist) 2377 { 2378 struct memlist *hl = NULL, *tl, **mlp; 2379 2380 if (mlist == NULL) 2381 return (NULL); 2382 2383 mlp = &hl; 2384 tl = *mlp; 2385 for (; mlist; mlist = mlist->next) { 2386 *mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP); 2387 (*mlp)->address = mlist->address; 2388 (*mlp)->size = mlist->size; 2389 (*mlp)->prev = tl; 2390 tl = *mlp; 2391 mlp = &((*mlp)->next); 2392 } 2393 *mlp = NULL; 2394 2395 return (hl); 2396 } 2397 2398 2399 static struct memlist * 2400 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len) 2401 { 2402 uint64_t end; 2403 struct memlist *ml, *tl, *nlp; 2404 2405 if (mlist == NULL) 2406 return (NULL); 2407 2408 end = base + len; 2409 if ((end <= mlist->address) || (base == end)) 2410 return (mlist); 2411 2412 for (tl = ml = mlist; ml; tl = ml, ml = nlp) { 2413 uint64_t mend; 2414 2415 nlp = ml->next; 2416 2417 if (end <= ml->address) 2418 break; 2419 2420 mend = ml->address + ml->size; 2421 if (base < mend) { 2422 if (base <= ml->address) { 2423 ml->address = end; 2424 if (end >= mend) 2425 ml->size = 0ull; 2426 else 2427 ml->size = mend - ml->address; 2428 } else { 2429 ml->size = base - ml->address; 2430 if (end < mend) { 2431 struct memlist *nl; 2432 /* 2433 * splitting an memlist entry. 2434 */ 2435 nl = kmem_alloc(sizeof (struct memlist), 2436 KM_SLEEP); 2437 nl->address = end; 2438 nl->size = mend - nl->address; 2439 if ((nl->next = nlp) != NULL) 2440 nlp->prev = nl; 2441 nl->prev = ml; 2442 ml->next = nl; 2443 nlp = nl; 2444 } 2445 } 2446 if (ml->size == 0ull) { 2447 if (ml == mlist) { 2448 if ((mlist = nlp) != NULL) 2449 nlp->prev = NULL; 2450 kmem_free(ml, sizeof (struct memlist)); 2451 if (mlist == NULL) 2452 break; 2453 ml = nlp; 2454 } else { 2455 if ((tl->next = nlp) != NULL) 2456 nlp->prev = tl; 2457 kmem_free(ml, sizeof (struct memlist)); 2458 ml = tl; 2459 } 2460 } 2461 } 2462 } 2463 2464 return (mlist); 2465 } 2466 2467 static void 2468 mc_get_mlist(mc_opl_t *mcp) 2469 { 2470 struct memlist *mlist; 2471 2472 memlist_read_lock(); 2473 mlist = mc_memlist_dup(phys_install); 2474 memlist_read_unlock(); 2475 2476 if (mlist) { 2477 mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address); 2478 } 2479 2480 if (mlist) { 2481 uint64_t startpa, endpa; 2482 2483 startpa = mcp->mc_start_address + mcp->mc_size; 2484 endpa = ptob(physmax + 1); 2485 if (endpa > startpa) { 2486 mlist = mc_memlist_del_span(mlist, startpa, 2487 endpa - startpa); 2488 } 2489 } 2490 2491 if (mlist) { 2492 mcp->mlist = mlist; 2493 } 2494 } 2495 2496 int 2497 mc_board_add(mc_opl_t *mcp) 2498 { 2499 struct mc_addr_spec *macaddr; 2500 cs_status_t *cs_status; 2501 int len, len1, i, bk, cc; 2502 mc_rsaddr_info_t rsaddr; 2503 uint32_t mirr; 2504 int nbanks = 0; 2505 uint64_t nbytes = 0; 2506 int mirror_mode = 0; 2507 int ret; 2508 2509 /* 2510 * Get configurations from "pseudo-mc" node which includes: 2511 * board# : LSB number 2512 * mac-addr : physical base address of MAC registers 2513 * csX-mac-pa-trans-table: translation table from DIMM address 2514 * to physical address or vice versa. 2515 */ 2516 mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip, 2517 DDI_PROP_DONTPASS, "board#", -1); 2518 2519 if (mcp->mc_board_num == -1) { 2520 return (DDI_FAILURE); 2521 } 2522 2523 /* 2524 * Get start address in this CAB. It can be gotten from 2525 * "sb-mem-ranges" property. 2526 */ 2527 2528 if (get_base_address(mcp) == DDI_FAILURE) { 2529 return (DDI_FAILURE); 2530 } 2531 /* get mac-pa trans tables */ 2532 for (i = 0; i < MC_TT_CS; i++) { 2533 len = MC_TT_ENTRIES; 2534 cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip, 2535 DDI_PROP_DONTPASS, mc_tbl_name[i], 2536 (caddr_t)mcp->mc_trans_table[i], &len); 2537 2538 if (cc != DDI_SUCCESS) { 2539 bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES); 2540 } 2541 } 2542 mcp->mlist = NULL; 2543 2544 mc_get_mlist(mcp); 2545 2546 /* initialize bank informations */ 2547 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2548 "mc-addr", (caddr_t)&macaddr, &len); 2549 if (cc != DDI_SUCCESS) { 2550 cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc); 2551 return (DDI_FAILURE); 2552 } 2553 2554 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2555 "cs-status", (caddr_t)&cs_status, &len1); 2556 2557 if (cc != DDI_SUCCESS) { 2558 if (len > 0) 2559 kmem_free(macaddr, len); 2560 cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc); 2561 return (DDI_FAILURE); 2562 } 2563 /* get the physical board number for a given logical board number */ 2564 mcp->mc_phys_board_num = mc_opl_get_physical_board(mcp->mc_board_num); 2565 2566 if (mcp->mc_phys_board_num < 0) { 2567 if (len > 0) 2568 kmem_free(macaddr, len); 2569 cmn_err(CE_WARN, "Unable to obtain the physical board number"); 2570 return (DDI_FAILURE); 2571 } 2572 2573 mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL); 2574 2575 for (i = 0; i < len1 / sizeof (cs_status_t); i++) { 2576 nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) | 2577 ((uint64_t)cs_status[i].cs_avail_low); 2578 } 2579 if (len1 > 0) 2580 kmem_free(cs_status, len1); 2581 nbanks = len / sizeof (struct mc_addr_spec); 2582 2583 if (nbanks > 0) 2584 nbytes /= nbanks; 2585 else { 2586 /* No need to free macaddr because len must be 0 */ 2587 mcp->mc_status |= MC_MEMORYLESS; 2588 return (DDI_SUCCESS); 2589 } 2590 2591 for (i = 0; i < BANKNUM_PER_SB; i++) { 2592 mcp->mc_scf_retry[i] = 0; 2593 mcp->mc_period[i] = 0; 2594 mcp->mc_speedup_period[i] = 0; 2595 } 2596 2597 /* 2598 * Get the memory size here. Let it be B (bytes). 2599 * Let T be the time in u.s. to scan 64 bytes. 2600 * If we want to complete 1 round of scanning in P seconds. 2601 * 2602 * B * T * 10^(-6) = P 2603 * --------------- 2604 * 64 2605 * 2606 * T = P * 64 * 10^6 2607 * ------------- 2608 * B 2609 * 2610 * = P * 64 * 10^6 2611 * ------------- 2612 * B 2613 * 2614 * The timing bits are set in PTRL_CNTL[28:26] where 2615 * 2616 * 0 - 1 m.s 2617 * 1 - 512 u.s. 2618 * 10 - 256 u.s. 2619 * 11 - 128 u.s. 2620 * 100 - 64 u.s. 2621 * 101 - 32 u.s. 2622 * 110 - 0 u.s. 2623 * 111 - reserved. 2624 * 2625 * 2626 * a[0] = 110, a[1] = 101, ... a[6] = 0 2627 * 2628 * cs-status property is int x 7 2629 * 0 - cs# 2630 * 1 - cs-status 2631 * 2 - cs-avail.hi 2632 * 3 - cs-avail.lo 2633 * 4 - dimm-capa.hi 2634 * 5 - dimm-capa.lo 2635 * 6 - #of dimms 2636 */ 2637 2638 if (nbytes > 0) { 2639 int i; 2640 uint64_t ms; 2641 ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes; 2642 mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds; 2643 for (i = 0; i < MC_MAX_SPEEDS - 1; i++) { 2644 if (ms < mc_scan_speeds[i + 1].mc_period) { 2645 mcp->mc_speed = mc_scan_speeds[i].mc_speeds; 2646 break; 2647 } 2648 } 2649 } else 2650 mcp->mc_speed = 0; 2651 2652 2653 for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) { 2654 struct mc_bank *bankp; 2655 mc_retry_info_t *retry; 2656 uint32_t reg; 2657 int k; 2658 2659 /* 2660 * setup bank 2661 */ 2662 bk = macaddr[i].bank; 2663 bankp = &(mcp->mc_bank[bk]); 2664 bankp->mcb_status = BANK_INSTALLED; 2665 bankp->mcb_reg_base = REGS_PA(macaddr, i); 2666 2667 bankp->mcb_retry_freelist = NULL; 2668 bankp->mcb_retry_pending = NULL; 2669 bankp->mcb_active = NULL; 2670 retry = &bankp->mcb_retry_infos[0]; 2671 for (k = 0; k < MC_RETRY_COUNT; k++, retry++) { 2672 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 2673 } 2674 2675 reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk)); 2676 bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS); 2677 2678 /* 2679 * check if mirror mode 2680 */ 2681 mirr = LD_MAC_REG(MAC_MIRR(mcp, bk)); 2682 2683 if (mirr & MAC_MIRR_MIRROR_MODE) { 2684 MC_LOG("Mirror -> /LSB%d/B%d\n", mcp->mc_board_num, 2685 bk); 2686 bankp->mcb_status |= BANK_MIRROR_MODE; 2687 mirror_mode = 1; 2688 /* 2689 * The following bit is only used for 2690 * error injection. We should clear it 2691 */ 2692 if (mirr & MAC_MIRR_BANK_EXCLUSIVE) 2693 ST_MAC_REG(MAC_MIRR(mcp, bk), 0); 2694 } 2695 2696 /* 2697 * restart if not mirror mode or the other bank 2698 * of the mirror is not running 2699 */ 2700 if (!(mirr & MAC_MIRR_MIRROR_MODE) || 2701 !(mcp->mc_bank[bk^1].mcb_status & BANK_PTRL_RUNNING)) { 2702 MC_LOG("Starting up /LSB%d/B%d\n", mcp->mc_board_num, 2703 bk); 2704 get_ptrl_start_address(mcp, bk, &rsaddr.mi_restartaddr); 2705 rsaddr.mi_valid = 0; 2706 rsaddr.mi_injectrestart = 0; 2707 restart_patrol(mcp, bk, &rsaddr); 2708 } else { 2709 MC_LOG("Not starting up /LSB%d/B%d\n", 2710 mcp->mc_board_num, bk); 2711 } 2712 bankp->mcb_status |= BANK_PTRL_RUNNING; 2713 } 2714 if (len > 0) 2715 kmem_free(macaddr, len); 2716 2717 ret = ndi_prop_update_int(DDI_DEV_T_NONE, mcp->mc_dip, "mirror-mode", 2718 mirror_mode); 2719 if (ret != DDI_PROP_SUCCESS) { 2720 cmn_err(CE_WARN, "Unable to update mirror-mode property"); 2721 } 2722 2723 mcp->mc_dimm_list = mc_get_dimm_list(mcp); 2724 2725 /* 2726 * set interval in HZ. 2727 */ 2728 mcp->mc_last_error = 0; 2729 2730 /* restart memory patrol checking */ 2731 mcp->mc_status |= MC_POLL_RUNNING; 2732 2733 return (DDI_SUCCESS); 2734 } 2735 2736 int 2737 mc_board_del(mc_opl_t *mcp) 2738 { 2739 int i; 2740 scf_log_t *p; 2741 2742 /* 2743 * cleanup mac state 2744 */ 2745 mutex_enter(&mcp->mc_lock); 2746 if (mcp->mc_status & MC_MEMORYLESS) { 2747 mutex_exit(&mcp->mc_lock); 2748 mutex_destroy(&mcp->mc_lock); 2749 return (DDI_SUCCESS); 2750 } 2751 for (i = 0; i < BANKNUM_PER_SB; i++) { 2752 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2753 mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED; 2754 } 2755 } 2756 2757 /* stop memory patrol checking */ 2758 mcp->mc_status &= ~MC_POLL_RUNNING; 2759 2760 /* just throw away all the scf logs */ 2761 for (i = 0; i < BANKNUM_PER_SB; i++) { 2762 while ((p = mcp->mc_scf_log[i]) != NULL) { 2763 mcp->mc_scf_log[i] = p->sl_next; 2764 mcp->mc_scf_total[i]--; 2765 kmem_free(p, sizeof (scf_log_t)); 2766 } 2767 } 2768 2769 if (mcp->mlist) 2770 mc_memlist_delete(mcp->mlist); 2771 2772 if (mcp->mc_dimm_list) 2773 mc_free_dimm_list(mcp->mc_dimm_list); 2774 2775 mutex_exit(&mcp->mc_lock); 2776 2777 mutex_destroy(&mcp->mc_lock); 2778 return (DDI_SUCCESS); 2779 } 2780 2781 int 2782 mc_suspend(mc_opl_t *mcp, uint32_t flag) 2783 { 2784 /* stop memory patrol checking */ 2785 mutex_enter(&mcp->mc_lock); 2786 if (mcp->mc_status & MC_MEMORYLESS) { 2787 mutex_exit(&mcp->mc_lock); 2788 return (DDI_SUCCESS); 2789 } 2790 2791 mcp->mc_status &= ~MC_POLL_RUNNING; 2792 2793 mcp->mc_status |= flag; 2794 mutex_exit(&mcp->mc_lock); 2795 2796 return (DDI_SUCCESS); 2797 } 2798 2799 void 2800 opl_mc_update_mlist(void) 2801 { 2802 int i; 2803 mc_opl_t *mcp; 2804 2805 /* 2806 * memory information is not updated until 2807 * the post attach/detach stage during DR. 2808 * This interface is used by dr_mem to inform 2809 * mc-opl to update the mlist. 2810 */ 2811 2812 mutex_enter(&mcmutex); 2813 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2814 if ((mcp = mc_instances[i]) == NULL) 2815 continue; 2816 mutex_enter(&mcp->mc_lock); 2817 if (mcp->mlist) 2818 mc_memlist_delete(mcp->mlist); 2819 mcp->mlist = NULL; 2820 mc_get_mlist(mcp); 2821 mutex_exit(&mcp->mc_lock); 2822 } 2823 mutex_exit(&mcmutex); 2824 } 2825 2826 /* caller must clear the SUSPEND bits or this will do nothing */ 2827 2828 int 2829 mc_resume(mc_opl_t *mcp, uint32_t flag) 2830 { 2831 int i; 2832 uint64_t basepa; 2833 2834 mutex_enter(&mcp->mc_lock); 2835 if (mcp->mc_status & MC_MEMORYLESS) { 2836 mutex_exit(&mcp->mc_lock); 2837 return (DDI_SUCCESS); 2838 } 2839 basepa = mcp->mc_start_address; 2840 if (get_base_address(mcp) == DDI_FAILURE) { 2841 mutex_exit(&mcp->mc_lock); 2842 return (DDI_FAILURE); 2843 } 2844 2845 if (basepa != mcp->mc_start_address) { 2846 if (mcp->mlist) 2847 mc_memlist_delete(mcp->mlist); 2848 mcp->mlist = NULL; 2849 mc_get_mlist(mcp); 2850 } 2851 2852 mcp->mc_status &= ~flag; 2853 2854 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 2855 mutex_exit(&mcp->mc_lock); 2856 return (DDI_SUCCESS); 2857 } 2858 2859 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2860 /* restart memory patrol checking */ 2861 mcp->mc_status |= MC_POLL_RUNNING; 2862 for (i = 0; i < BANKNUM_PER_SB; i++) { 2863 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2864 mc_check_errors_func(mcp); 2865 } 2866 } 2867 } 2868 mutex_exit(&mcp->mc_lock); 2869 2870 return (DDI_SUCCESS); 2871 } 2872 2873 static mc_opl_t * 2874 mc_pa_to_mcp(uint64_t pa) 2875 { 2876 mc_opl_t *mcp; 2877 int i; 2878 2879 ASSERT(MUTEX_HELD(&mcmutex)); 2880 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2881 if ((mcp = mc_instances[i]) == NULL) 2882 continue; 2883 /* if mac patrol is suspended, we cannot rely on it */ 2884 if (!(mcp->mc_status & MC_POLL_RUNNING) || 2885 (mcp->mc_status & MC_SOFT_SUSPENDED)) 2886 continue; 2887 if (mc_rangecheck_pa(mcp, pa)) { 2888 return (mcp); 2889 } 2890 } 2891 return (NULL); 2892 } 2893 2894 /* 2895 * Get Physical Board number from Logical one. 2896 */ 2897 static int 2898 mc_opl_get_physical_board(int sb) 2899 { 2900 if (&opl_get_physical_board) { 2901 return (opl_get_physical_board(sb)); 2902 } 2903 2904 cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n"); 2905 return (-1); 2906 } 2907 2908 /* ARGSUSED */ 2909 int 2910 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen, 2911 int *lenp) 2912 { 2913 int i; 2914 int j; 2915 int sb; 2916 int bank; 2917 int cs; 2918 mc_opl_t *mcp; 2919 char memb_num; 2920 2921 mutex_enter(&mcmutex); 2922 2923 if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) || 2924 (!pa_is_valid(mcp, flt_addr))) { 2925 mutex_exit(&mcmutex); 2926 if (snprintf(buf, buflen, "UNKNOWN") >= buflen) { 2927 return (ENOSPC); 2928 } else { 2929 if (lenp) 2930 *lenp = strlen(buf); 2931 } 2932 return (0); 2933 } 2934 2935 bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address); 2936 sb = mcp->mc_phys_board_num; 2937 cs = pa_to_cs(mcp, flt_addr - mcp->mc_start_address); 2938 2939 if (sb == -1) { 2940 mutex_exit(&mcmutex); 2941 return (ENXIO); 2942 } 2943 2944 if (plat_model == MODEL_DC) { 2945 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 2946 j = (cs == 0) ? i : i + 2; 2947 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s", 2948 model_names[plat_model].unit_name, sb, 2949 mc_dc_dimm_unum_table[j], 2950 mc_dc_dimm_unum_table[j + 1]); 2951 } else { 2952 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 2953 j = (cs == 0) ? i : i + 2; 2954 memb_num = mc_ff_dimm_unum_table[i][0]; 2955 snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s", 2956 model_names[plat_model].unit_name, 2957 model_names[plat_model].mem_name, memb_num, 2958 &mc_ff_dimm_unum_table[j][1], 2959 &mc_ff_dimm_unum_table[j + 1][1]); 2960 } 2961 if (lenp) { 2962 *lenp = strlen(buf); 2963 } 2964 mutex_exit(&mcmutex); 2965 return (0); 2966 } 2967 2968 int 2969 opl_mc_suspend(void) 2970 { 2971 mc_opl_t *mcp; 2972 int i; 2973 2974 mutex_enter(&mcmutex); 2975 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2976 if ((mcp = mc_instances[i]) == NULL) 2977 continue; 2978 mc_suspend(mcp, MC_SOFT_SUSPENDED); 2979 } 2980 mutex_exit(&mcmutex); 2981 2982 return (0); 2983 } 2984 2985 int 2986 opl_mc_resume(void) 2987 { 2988 mc_opl_t *mcp; 2989 int i; 2990 2991 mutex_enter(&mcmutex); 2992 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2993 if ((mcp = mc_instances[i]) == NULL) 2994 continue; 2995 mc_resume(mcp, MC_SOFT_SUSPENDED); 2996 } 2997 mutex_exit(&mcmutex); 2998 2999 return (0); 3000 } 3001 static void 3002 insert_mcp(mc_opl_t *mcp) 3003 { 3004 mutex_enter(&mcmutex); 3005 if (mc_instances[mcp->mc_board_num] != NULL) { 3006 MC_LOG("mc-opl instance for board# %d already exists\n", 3007 mcp->mc_board_num); 3008 } 3009 mc_instances[mcp->mc_board_num] = mcp; 3010 mutex_exit(&mcmutex); 3011 } 3012 3013 static void 3014 delete_mcp(mc_opl_t *mcp) 3015 { 3016 mutex_enter(&mcmutex); 3017 mc_instances[mcp->mc_board_num] = 0; 3018 mutex_exit(&mcmutex); 3019 } 3020 3021 /* Error injection interface */ 3022 3023 static void 3024 mc_lock_va(uint64_t pa, caddr_t new_va) 3025 { 3026 tte_t tte; 3027 3028 vtag_flushpage(new_va, (uint64_t)ksfmmup); 3029 sfmmu_memtte(&tte, pa >> PAGESHIFT, PROC_DATA|HAT_NOSYNC, TTE8K); 3030 tte.tte_intlo |= TTE_LCK_INT; 3031 sfmmu_dtlb_ld_kva(new_va, &tte); 3032 } 3033 3034 static void 3035 mc_unlock_va(caddr_t va) 3036 { 3037 vtag_flushpage(va, (uint64_t)ksfmmup); 3038 } 3039 3040 /* ARGSUSED */ 3041 int 3042 mc_inject_error(int error_type, uint64_t pa, uint32_t flags) 3043 { 3044 mc_opl_t *mcp; 3045 int bank; 3046 uint32_t dimm_addr; 3047 uint32_t cntl; 3048 mc_rsaddr_info_t rsaddr; 3049 uint32_t data, stat; 3050 int both_sides = 0; 3051 uint64_t pa0; 3052 int extra_injection_needed = 0; 3053 extern void cpu_flush_ecache(void); 3054 3055 MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags); 3056 3057 mutex_enter(&mcmutex); 3058 if ((mcp = mc_pa_to_mcp(pa)) == NULL) { 3059 mutex_exit(&mcmutex); 3060 MC_LOG("mc_inject_error: invalid pa\n"); 3061 return (ENOTSUP); 3062 } 3063 3064 mutex_enter(&mcp->mc_lock); 3065 mutex_exit(&mcmutex); 3066 3067 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 3068 mutex_exit(&mcp->mc_lock); 3069 MC_LOG("mc-opl has been suspended. No error injection.\n"); 3070 return (EBUSY); 3071 } 3072 3073 /* convert pa to offset within the board */ 3074 MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address); 3075 3076 if (!pa_is_valid(mcp, pa)) { 3077 mutex_exit(&mcp->mc_lock); 3078 return (EINVAL); 3079 } 3080 3081 pa0 = pa - mcp->mc_start_address; 3082 3083 bank = pa_to_bank(mcp, pa0); 3084 3085 if (flags & MC_INJECT_FLAG_OTHER) 3086 bank = bank ^ 1; 3087 3088 if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) { 3089 mutex_exit(&mcp->mc_lock); 3090 MC_LOG("Not mirror mode\n"); 3091 return (EINVAL); 3092 } 3093 3094 dimm_addr = pa_to_dimm(mcp, pa0); 3095 3096 MC_LOG("injecting error to /LSB%d/B%d/%x\n", mcp->mc_board_num, bank, 3097 dimm_addr); 3098 3099 3100 switch (error_type) { 3101 case MC_INJECT_INTERMITTENT_MCE: 3102 case MC_INJECT_PERMANENT_MCE: 3103 case MC_INJECT_MUE: 3104 both_sides = 1; 3105 } 3106 3107 if (flags & MC_INJECT_FLAG_RESET) 3108 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0); 3109 3110 ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK); 3111 3112 if (both_sides) { 3113 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0); 3114 ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), dimm_addr & 3115 MAC_EG_ADD_MASK); 3116 } 3117 3118 switch (error_type) { 3119 case MC_INJECT_SUE: 3120 extra_injection_needed = 1; 3121 /*FALLTHROUGH*/ 3122 case MC_INJECT_UE: 3123 case MC_INJECT_MUE: 3124 if (flags & MC_INJECT_FLAG_PATH) { 3125 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 | 3126 MAC_EG_FORCE_READ16 | MAC_EG_RDERR_ONCE; 3127 } else { 3128 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR00 | 3129 MAC_EG_FORCE_DERR16 | MAC_EG_DERR_ONCE; 3130 } 3131 flags |= MC_INJECT_FLAG_ST; 3132 break; 3133 case MC_INJECT_INTERMITTENT_CE: 3134 case MC_INJECT_INTERMITTENT_MCE: 3135 if (flags & MC_INJECT_FLAG_PATH) { 3136 cntl = MAC_EG_ADD_FIX |MAC_EG_FORCE_READ00 | 3137 MAC_EG_RDERR_ONCE; 3138 } else { 3139 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 | 3140 MAC_EG_DERR_ONCE; 3141 } 3142 extra_injection_needed = 1; 3143 flags |= MC_INJECT_FLAG_ST; 3144 break; 3145 case MC_INJECT_PERMANENT_CE: 3146 case MC_INJECT_PERMANENT_MCE: 3147 if (flags & MC_INJECT_FLAG_PATH) { 3148 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 | 3149 MAC_EG_RDERR_ALWAYS; 3150 } else { 3151 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 | 3152 MAC_EG_DERR_ALWAYS; 3153 } 3154 flags |= MC_INJECT_FLAG_ST; 3155 break; 3156 case MC_INJECT_CMPE: 3157 data = 0xabcdefab; 3158 stphys(pa, data); 3159 cpu_flush_ecache(); 3160 MC_LOG("CMPE: writing data %x to %lx\n", data, pa); 3161 ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE); 3162 stphys(pa, data ^ 0xffffffff); 3163 membar_sync(); 3164 cpu_flush_ecache(); 3165 ST_MAC_REG(MAC_MIRR(mcp, bank), 0); 3166 MC_LOG("CMPE: write new data %xto %lx\n", data, pa); 3167 cntl = 0; 3168 break; 3169 case MC_INJECT_NOP: 3170 cntl = 0; 3171 break; 3172 default: 3173 MC_LOG("mc_inject_error: invalid option\n"); 3174 cntl = 0; 3175 } 3176 3177 if (cntl) { 3178 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK); 3179 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 3180 3181 if (both_sides) { 3182 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 3183 MAC_EG_SETUP_MASK); 3184 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 3185 } 3186 } 3187 3188 /* 3189 * For all injection cases except compare error, we 3190 * must write to the PA to trigger the error. 3191 */ 3192 3193 if (flags & MC_INJECT_FLAG_ST) { 3194 data = 0xf0e0d0c0; 3195 MC_LOG("Writing %x to %lx\n", data, pa); 3196 stphys(pa, data); 3197 cpu_flush_ecache(); 3198 } 3199 3200 3201 if (flags & MC_INJECT_FLAG_LD) { 3202 if (flags & MC_INJECT_FLAG_PREFETCH) { 3203 /* 3204 * Use strong prefetch operation to 3205 * inject MI errors. 3206 */ 3207 page_t *pp; 3208 extern void mc_prefetch(caddr_t); 3209 3210 MC_LOG("prefetch\n"); 3211 3212 pp = page_numtopp_nolock(pa >> PAGESHIFT); 3213 if (pp != NULL) { 3214 caddr_t va, va1; 3215 3216 va = ppmapin(pp, PROT_READ|PROT_WRITE, 3217 (caddr_t)-1); 3218 kpreempt_disable(); 3219 mc_lock_va((uint64_t)pa, va); 3220 va1 = va + (pa & (PAGESIZE - 1)); 3221 mc_prefetch(va1); 3222 mc_unlock_va(va); 3223 kpreempt_enable(); 3224 ppmapout(va); 3225 3226 /* 3227 * For MI errors, we need one extra 3228 * injection for HW patrol to stop. 3229 */ 3230 extra_injection_needed = 1; 3231 } else { 3232 cmn_err(CE_WARN, "Cannot find page structure" 3233 " for PA %lx\n", pa); 3234 } 3235 } else { 3236 MC_LOG("Reading from %lx\n", pa); 3237 data = ldphys(pa); 3238 MC_LOG("data = %x\n", data); 3239 } 3240 3241 if (extra_injection_needed) { 3242 /* 3243 * These are the injection cases where the 3244 * requested injected errors will not cause the HW 3245 * patrol to stop. For these cases, we need to inject 3246 * an extra 'real' PTRL error to force the 3247 * HW patrol to stop so that we can report the 3248 * errors injected. Note that we cannot read 3249 * and report error status while the HW patrol 3250 * is running. 3251 */ 3252 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 3253 cntl & MAC_EG_SETUP_MASK); 3254 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 3255 3256 if (both_sides) { 3257 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 3258 MAC_EG_SETUP_MASK); 3259 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 3260 } 3261 data = 0xf0e0d0c0; 3262 MC_LOG("Writing %x to %lx\n", data, pa); 3263 stphys(pa, data); 3264 cpu_flush_ecache(); 3265 } 3266 } 3267 3268 if (flags & MC_INJECT_FLAG_RESTART) { 3269 MC_LOG("Restart patrol\n"); 3270 rsaddr.mi_restartaddr.ma_bd = mcp->mc_board_num; 3271 rsaddr.mi_restartaddr.ma_bank = bank; 3272 rsaddr.mi_restartaddr.ma_dimm_addr = dimm_addr; 3273 rsaddr.mi_valid = 1; 3274 rsaddr.mi_injectrestart = 1; 3275 restart_patrol(mcp, bank, &rsaddr); 3276 } 3277 3278 if (flags & MC_INJECT_FLAG_POLL) { 3279 int running; 3280 int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank; 3281 3282 MC_LOG("Poll patrol error\n"); 3283 stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank)); 3284 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 3285 running = cntl & MAC_CNTL_PTRL_START; 3286 3287 if (!running && 3288 (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) { 3289 /* 3290 * HW patrol stopped and we have errors to 3291 * report. Do it. 3292 */ 3293 mcp->mc_speedup_period[ebank] = 0; 3294 rsaddr.mi_valid = 0; 3295 rsaddr.mi_injectrestart = 0; 3296 if (IS_MIRROR(mcp, bank)) { 3297 mc_error_handler_mir(mcp, bank, &rsaddr); 3298 } else { 3299 mc_error_handler(mcp, bank, &rsaddr); 3300 } 3301 3302 restart_patrol(mcp, bank, &rsaddr); 3303 } else { 3304 /* 3305 * We are expecting to report injected 3306 * errors but the HW patrol is still running. 3307 * Speed up the scanning 3308 */ 3309 mcp->mc_speedup_period[ebank] = 2; 3310 MAC_CMD(mcp, bank, 0); 3311 restart_patrol(mcp, bank, NULL); 3312 } 3313 } 3314 3315 mutex_exit(&mcp->mc_lock); 3316 return (0); 3317 } 3318 3319 void 3320 mc_stphysio(uint64_t pa, uint32_t data) 3321 { 3322 MC_LOG("0x%x -> pa(%lx)\n", data, pa); 3323 stphysio(pa, data); 3324 3325 /* force the above write to be processed by mac patrol */ 3326 data = ldphysio(pa); 3327 MC_LOG("pa(%lx) = 0x%x\n", pa, data); 3328 } 3329 3330 uint32_t 3331 mc_ldphysio(uint64_t pa) 3332 { 3333 uint32_t rv; 3334 3335 rv = ldphysio(pa); 3336 MC_LOG("pa(%lx) = 0x%x\n", pa, rv); 3337 return (rv); 3338 } 3339 3340 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9') 3341 3342 /* 3343 * parse_unum_memory -- extract the board number and the DIMM name from 3344 * the unum. 3345 * 3346 * Return 0 for success and non-zero for a failure. 3347 */ 3348 int 3349 parse_unum_memory(char *unum, int *board, char *dname) 3350 { 3351 char *c; 3352 char x, y, z; 3353 3354 if ((c = strstr(unum, "CMU")) != NULL) { 3355 /* DC Model */ 3356 c += 3; 3357 *board = (uint8_t)stoi(&c); 3358 if ((c = strstr(c, "MEM")) == NULL) { 3359 return (1); 3360 } 3361 c += 3; 3362 if (strlen(c) < 3) { 3363 return (2); 3364 } 3365 if ((!isdigit(c[0])) || (!(isdigit(c[1]))) || 3366 ((c[2] != 'A') && (c[2] != 'B'))) { 3367 return (3); 3368 } 3369 x = c[0]; 3370 y = c[1]; 3371 z = c[2]; 3372 } else if ((c = strstr(unum, "MBU_")) != NULL) { 3373 /* FF1/FF2 Model */ 3374 c += 4; 3375 if ((c[0] != 'A') && (c[0] != 'B')) { 3376 return (4); 3377 } 3378 if ((c = strstr(c, "MEMB")) == NULL) { 3379 return (5); 3380 } 3381 c += 4; 3382 3383 x = c[0]; 3384 *board = ((uint8_t)stoi(&c)) / 4; 3385 if ((c = strstr(c, "MEM")) == NULL) { 3386 return (6); 3387 } 3388 c += 3; 3389 if (strlen(c) < 2) { 3390 return (7); 3391 } 3392 if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) { 3393 return (8); 3394 } 3395 y = c[0]; 3396 z = c[1]; 3397 } else { 3398 return (9); 3399 } 3400 if (*board < 0) { 3401 return (10); 3402 } 3403 dname[0] = x; 3404 dname[1] = y; 3405 dname[2] = z; 3406 dname[3] = '\0'; 3407 return (0); 3408 } 3409 3410 /* 3411 * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and 3412 * the DIMM name. 3413 */ 3414 int 3415 mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 3416 int buflen, int *lenp) 3417 { 3418 int ret = ENODEV; 3419 mc_dimm_info_t *d = NULL; 3420 3421 if ((d = mcp->mc_dimm_list) == NULL) 3422 return (ENOTSUP); 3423 3424 for (; d != NULL; d = d->md_next) { 3425 if (strcmp(d->md_dimmname, dname) == 0) { 3426 break; 3427 } 3428 } 3429 if (d != NULL) { 3430 *lenp = strlen(d->md_serial) + strlen(d->md_partnum); 3431 if (buflen <= *lenp) { 3432 cmn_err(CE_WARN, "mc_get_mem_sid_dimm: " 3433 "buflen is smaller than %d\n", *lenp); 3434 ret = ENOSPC; 3435 } else { 3436 snprintf(buf, buflen, "%s:%s", 3437 d->md_serial, d->md_partnum); 3438 ret = 0; 3439 } 3440 } 3441 MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n", 3442 ret, dname, (ret == 0) ? buf : ""); 3443 return (ret); 3444 } 3445 3446 int 3447 mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int sb, 3448 int bank, uint32_t mf_type, uint32_t d_slot) 3449 { 3450 int lenp = buflen; 3451 int id; 3452 int ret; 3453 char *dimmnm; 3454 3455 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 3456 mf_type == FLT_TYPE_PERMANENT_CE) { 3457 if (plat_model == MODEL_DC) { 3458 id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 3459 dimmnm = mc_dc_dimm_unum_table[id]; 3460 } else { 3461 id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 3462 dimmnm = mc_ff_dimm_unum_table[id]; 3463 } 3464 if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen, 3465 &lenp)) != 0) { 3466 return (ret); 3467 } 3468 } else { 3469 return (1); 3470 } 3471 3472 return (0); 3473 } 3474 3475 /* 3476 * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum. 3477 */ 3478 int 3479 mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 3480 { 3481 int i; 3482 int ret = ENODEV; 3483 int board; 3484 char dname[MCOPL_MAX_DIMMNAME + 1]; 3485 mc_opl_t *mcp; 3486 3487 MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen); 3488 if ((ret = parse_unum_memory(unum, &board, dname)) != 0) { 3489 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3490 unum, ret); 3491 return (EINVAL); 3492 } 3493 3494 if (board < 0) { 3495 MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n", 3496 board, dname); 3497 return (EINVAL); 3498 } 3499 3500 mutex_enter(&mcmutex); 3501 /* 3502 * return ENOENT if we can not find the matching board. 3503 */ 3504 ret = ENOENT; 3505 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3506 if ((mcp = mc_instances[i]) == NULL) 3507 continue; 3508 mutex_enter(&mcp->mc_lock); 3509 if (mcp->mc_phys_board_num != board) { 3510 mutex_exit(&mcp->mc_lock); 3511 continue; 3512 } 3513 ret = mc_get_mem_sid_dimm(mcp, dname, buf, buflen, lenp); 3514 if (ret == 0) { 3515 mutex_exit(&mcp->mc_lock); 3516 break; 3517 } 3518 mutex_exit(&mcp->mc_lock); 3519 } 3520 mutex_exit(&mcmutex); 3521 return (ret); 3522 } 3523 3524 /* 3525 * mc_get_mem_offset -- get the offset in a DIMM for a given physical address. 3526 */ 3527 int 3528 mc_get_mem_offset(uint64_t paddr, uint64_t *offp) 3529 { 3530 int i; 3531 int ret = ENODEV; 3532 mc_addr_t maddr; 3533 mc_opl_t *mcp; 3534 3535 mutex_enter(&mcmutex); 3536 for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) { 3537 if ((mcp = mc_instances[i]) == NULL) 3538 continue; 3539 mutex_enter(&mcp->mc_lock); 3540 if (!pa_is_valid(mcp, paddr)) { 3541 mutex_exit(&mcp->mc_lock); 3542 continue; 3543 } 3544 if (pa_to_maddr(mcp, paddr, &maddr) == 0) { 3545 *offp = maddr.ma_dimm_addr; 3546 ret = 0; 3547 } 3548 mutex_exit(&mcp->mc_lock); 3549 } 3550 mutex_exit(&mcmutex); 3551 MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n", 3552 ret, paddr, *offp); 3553 return (ret); 3554 } 3555 3556 /* 3557 * dname_to_bankslot - Get the bank and slot number from the DIMM name. 3558 */ 3559 int 3560 dname_to_bankslot(char *dname, int *bank, int *slot) 3561 { 3562 int i; 3563 int tsz; 3564 char **tbl; 3565 3566 if (plat_model == MODEL_DC) { /* DC */ 3567 tbl = mc_dc_dimm_unum_table; 3568 tsz = OPL_MAX_DIMMS; 3569 } else { 3570 tbl = mc_ff_dimm_unum_table; 3571 tsz = 2 * OPL_MAX_DIMMS; 3572 } 3573 3574 for (i = 0; i < tsz; i++) { 3575 if (strcmp(dname, tbl[i]) == 0) { 3576 break; 3577 } 3578 } 3579 if (i == tsz) { 3580 return (1); 3581 } 3582 *bank = INDEX_TO_BANK(i); 3583 *slot = INDEX_TO_SLOT(i); 3584 return (0); 3585 } 3586 3587 /* 3588 * mc_get_mem_addr -- get the physical address of a DIMM corresponding 3589 * to the unum and sid. 3590 */ 3591 int 3592 mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr) 3593 { 3594 int board; 3595 int bank; 3596 int slot; 3597 int i; 3598 int ret = ENODEV; 3599 char dname[MCOPL_MAX_DIMMNAME + 1]; 3600 mc_addr_t maddr; 3601 mc_opl_t *mcp; 3602 3603 MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n", 3604 unum, sid, offset); 3605 if (parse_unum_memory(unum, &board, dname) != 0) { 3606 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3607 unum, ret); 3608 return (EINVAL); 3609 } 3610 3611 if (board < 0) { 3612 MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n", 3613 board, dname); 3614 return (EINVAL); 3615 } 3616 3617 mutex_enter(&mcmutex); 3618 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3619 if ((mcp = mc_instances[i]) == NULL) 3620 continue; 3621 mutex_enter(&mcp->mc_lock); 3622 if (mcp->mc_phys_board_num != board) { 3623 mutex_exit(&mcp->mc_lock); 3624 continue; 3625 } 3626 3627 ret = dname_to_bankslot(dname, &bank, &slot); 3628 MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot); 3629 if (ret != 0) { 3630 MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n"); 3631 ret = ENODEV; 3632 } else { 3633 maddr.ma_bd = mcp->mc_board_num; 3634 maddr.ma_bank = bank; 3635 maddr.ma_dimm_addr = offset; 3636 ret = mcaddr_to_pa(mcp, &maddr, paddr); 3637 if (ret != 0) { 3638 MC_LOG("mc_get_mem_addr: " 3639 "mcaddr_to_pa failed\n"); 3640 ret = ENODEV; 3641 } 3642 mutex_exit(&mcp->mc_lock); 3643 break; 3644 } 3645 mutex_exit(&mcp->mc_lock); 3646 } 3647 mutex_exit(&mcmutex); 3648 MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr); 3649 return (ret); 3650 } 3651 3652 static void 3653 mc_free_dimm_list(mc_dimm_info_t *d) 3654 { 3655 mc_dimm_info_t *next; 3656 3657 while (d != NULL) { 3658 next = d->md_next; 3659 kmem_free(d, sizeof (mc_dimm_info_t)); 3660 d = next; 3661 } 3662 } 3663 3664 /* 3665 * mc_get_dimm_list -- get the list of dimms with serial-id info 3666 * from the SP. 3667 */ 3668 mc_dimm_info_t * 3669 mc_get_dimm_list(mc_opl_t *mcp) 3670 { 3671 uint32_t bufsz; 3672 uint32_t maxbufsz; 3673 int ret; 3674 int sexp; 3675 board_dimm_info_t *bd_dimmp; 3676 mc_dimm_info_t *dimm_list = NULL; 3677 3678 maxbufsz = bufsz = sizeof (board_dimm_info_t) + 3679 ((MCOPL_MAX_DIMMNAME + MCOPL_MAX_SERIAL + 3680 MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS); 3681 3682 bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP); 3683 ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz); 3684 3685 MC_LOG("mc_get_dimm_list: scf_service_getinfo returned=%d\n", ret); 3686 if (ret == 0) { 3687 sexp = sizeof (board_dimm_info_t) + 3688 ((bd_dimmp->bd_dnamesz + bd_dimmp->bd_serialsz + 3689 bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms); 3690 3691 if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) && 3692 (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) && 3693 (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) && 3694 (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) && 3695 (sexp <= bufsz)) { 3696 3697 #ifdef DEBUG 3698 if (oplmc_debug) 3699 mc_dump_dimm_info(bd_dimmp); 3700 #endif 3701 dimm_list = mc_prepare_dimmlist(bd_dimmp); 3702 3703 } else { 3704 cmn_err(CE_WARN, "DIMM info version mismatch\n"); 3705 } 3706 } 3707 kmem_free(bd_dimmp, maxbufsz); 3708 MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", dimm_list); 3709 return (dimm_list); 3710 } 3711 3712 /* 3713 * mc_prepare_dimmlist - Prepare the dimm list from the information 3714 * received from the SP. 3715 */ 3716 mc_dimm_info_t * 3717 mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp) 3718 { 3719 char *dimm_name; 3720 char *serial; 3721 char *part; 3722 int dimm; 3723 int dnamesz = bd_dimmp->bd_dnamesz; 3724 int sersz = bd_dimmp->bd_serialsz; 3725 int partsz = bd_dimmp->bd_partnumsz; 3726 mc_dimm_info_t *dimm_list = NULL; 3727 mc_dimm_info_t *d; 3728 3729 dimm_name = (char *)(bd_dimmp + 1); 3730 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3731 3732 d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t), 3733 KM_SLEEP); 3734 3735 bcopy(dimm_name, d->md_dimmname, dnamesz); 3736 d->md_dimmname[dnamesz] = 0; 3737 3738 serial = dimm_name + dnamesz; 3739 bcopy(serial, d->md_serial, sersz); 3740 d->md_serial[sersz] = 0; 3741 3742 part = serial + sersz; 3743 bcopy(part, d->md_partnum, partsz); 3744 d->md_partnum[partsz] = 0; 3745 3746 d->md_next = dimm_list; 3747 dimm_list = d; 3748 dimm_name = part + partsz; 3749 } 3750 return (dimm_list); 3751 } 3752 3753 #ifdef DEBUG 3754 void 3755 mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz) 3756 { 3757 char dname[MCOPL_MAX_DIMMNAME + 1]; 3758 char serial[MCOPL_MAX_SERIAL + 1]; 3759 char part[ MCOPL_MAX_PARTNUM + 1]; 3760 char *b; 3761 3762 b = buf; 3763 bcopy(b, dname, dnamesz); 3764 dname[dnamesz] = 0; 3765 3766 b += dnamesz; 3767 bcopy(b, serial, serialsz); 3768 serial[serialsz] = 0; 3769 3770 b += serialsz; 3771 bcopy(b, part, partnumsz); 3772 part[partnumsz] = 0; 3773 3774 printf("DIMM=%s Serial=%s PartNum=%s\n", dname, serial, part); 3775 } 3776 3777 void 3778 mc_dump_dimm_info(board_dimm_info_t *bd_dimmp) 3779 { 3780 int dimm; 3781 int dnamesz = bd_dimmp->bd_dnamesz; 3782 int sersz = bd_dimmp->bd_serialsz; 3783 int partsz = bd_dimmp->bd_partnumsz; 3784 char *buf; 3785 3786 printf("Version=%d Board=%02d DIMMs=%d NameSize=%d " 3787 "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version, 3788 bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz, 3789 bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz); 3790 printf("======================================================\n"); 3791 3792 buf = (char *)(bd_dimmp + 1); 3793 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3794 mc_dump_dimm(buf, dnamesz, sersz, partsz); 3795 buf += dnamesz + sersz + partsz; 3796 } 3797 printf("======================================================\n"); 3798 } 3799 3800 3801 /* ARGSUSED */ 3802 static int 3803 mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 3804 int *rvalp) 3805 { 3806 caddr_t buf; 3807 uint64_t pa; 3808 int rv = 0; 3809 int i; 3810 uint32_t flags; 3811 static uint32_t offset = 0; 3812 3813 3814 flags = (cmd >> 4) & 0xfffffff; 3815 3816 cmd &= 0xf; 3817 3818 MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags); 3819 3820 if (arg != NULL) { 3821 if (ddi_copyin((const void *)arg, (void *)&pa, 3822 sizeof (uint64_t), 0) < 0) { 3823 rv = EFAULT; 3824 return (rv); 3825 } 3826 buf = NULL; 3827 } else { 3828 buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP); 3829 3830 pa = va_to_pa(buf); 3831 pa += offset; 3832 3833 offset += 64; 3834 if (offset >= PAGESIZE) 3835 offset = 0; 3836 } 3837 3838 switch (cmd) { 3839 case MCI_CE: 3840 mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, flags); 3841 break; 3842 case MCI_PERM_CE: 3843 mc_inject_error(MC_INJECT_PERMANENT_CE, pa, flags); 3844 break; 3845 case MCI_UE: 3846 mc_inject_error(MC_INJECT_UE, pa, flags); 3847 break; 3848 case MCI_M_CE: 3849 mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, flags); 3850 break; 3851 case MCI_M_PCE: 3852 mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, flags); 3853 break; 3854 case MCI_M_UE: 3855 mc_inject_error(MC_INJECT_MUE, pa, flags); 3856 break; 3857 case MCI_CMP: 3858 mc_inject_error(MC_INJECT_CMPE, pa, flags); 3859 break; 3860 case MCI_NOP: 3861 mc_inject_error(MC_INJECT_NOP, pa, flags); break; 3862 case MCI_SHOW_ALL: 3863 mc_debug_show_all = 1; 3864 break; 3865 case MCI_SHOW_NONE: 3866 mc_debug_show_all = 0; 3867 break; 3868 case MCI_ALLOC: 3869 /* 3870 * just allocate some kernel memory and never free it 3871 * 512 MB seems to be the maximum size supported. 3872 */ 3873 cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512); 3874 for (i = 0; i < flags; i++) { 3875 buf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP); 3876 cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n", 3877 (u_longlong_t)buf, (u_longlong_t)va_to_pa(buf)); 3878 } 3879 break; 3880 case MCI_SUSPEND: 3881 (void) opl_mc_suspend(); 3882 break; 3883 case MCI_RESUME: 3884 (void) opl_mc_resume(); 3885 break; 3886 default: 3887 rv = ENXIO; 3888 } 3889 return (rv); 3890 } 3891 3892 #endif /* DEBUG */ 3893