1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2006 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/conf.h> 34 #include <sys/modctl.h> 35 #include <sys/stat.h> 36 #include <sys/async.h> 37 #include <sys/machcpuvar.h> 38 #include <sys/machsystm.h> 39 #include <sys/promif.h> 40 #include <sys/ksynch.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/ddifm.h> 44 #include <sys/fm/protocol.h> 45 #include <sys/fm/util.h> 46 #include <sys/kmem.h> 47 #include <sys/fm/io/opl_mc_fm.h> 48 #include <sys/memlist.h> 49 #include <sys/param.h> 50 #include <sys/disp.h> 51 #include <vm/page.h> 52 #include <sys/mc-opl.h> 53 #include <sys/opl.h> 54 #include <sys/opl_dimm.h> 55 #include <sys/scfd/scfostoescf.h> 56 #include <sys/cpu_module.h> 57 #include <vm/seg_kmem.h> 58 #include <sys/vmem.h> 59 #include <vm/hat_sfmmu.h> 60 #include <sys/vmsystm.h> 61 #include <sys/membar.h> 62 63 /* 64 * Function prototypes 65 */ 66 static int mc_open(dev_t *, int, int, cred_t *); 67 static int mc_close(dev_t, int, int, cred_t *); 68 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 69 static int mc_attach(dev_info_t *, ddi_attach_cmd_t); 70 static int mc_detach(dev_info_t *, ddi_detach_cmd_t); 71 72 static int mc_poll_init(void); 73 static void mc_poll_fini(void); 74 static int mc_board_add(mc_opl_t *mcp); 75 static int mc_board_del(mc_opl_t *mcp); 76 static int mc_suspend(mc_opl_t *mcp, uint32_t flag); 77 static int mc_resume(mc_opl_t *mcp, uint32_t flag); 78 int opl_mc_suspend(void); 79 int opl_mc_resume(void); 80 81 static void insert_mcp(mc_opl_t *mcp); 82 static void delete_mcp(mc_opl_t *mcp); 83 84 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr); 85 86 static int mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa); 87 88 int mc_get_mem_unum(int, uint64_t, char *, int, int *); 89 int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr); 90 int mc_get_mem_offset(uint64_t paddr, uint64_t *offp); 91 int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp); 92 int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 93 int buflen, int *lenp); 94 mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp); 95 mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp); 96 int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank, 97 uint32_t mf_type, uint32_t d_slot); 98 static void mc_free_dimm_list(mc_dimm_info_t *d); 99 static void mc_get_mlist(mc_opl_t *); 100 static void mc_polling(void); 101 static int mc_opl_get_physical_board(int); 102 103 #ifdef DEBUG 104 static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *); 105 void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz); 106 void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp); 107 #endif 108 109 #pragma weak opl_get_physical_board 110 extern int opl_get_physical_board(int); 111 extern int plat_max_boards(void); 112 113 /* 114 * Configuration data structures 115 */ 116 static struct cb_ops mc_cb_ops = { 117 mc_open, /* open */ 118 mc_close, /* close */ 119 nulldev, /* strategy */ 120 nulldev, /* print */ 121 nodev, /* dump */ 122 nulldev, /* read */ 123 nulldev, /* write */ 124 mc_ioctl, /* ioctl */ 125 nodev, /* devmap */ 126 nodev, /* mmap */ 127 nodev, /* segmap */ 128 nochpoll, /* poll */ 129 ddi_prop_op, /* cb_prop_op */ 130 0, /* streamtab */ 131 D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flag */ 132 CB_REV, /* rev */ 133 nodev, /* cb_aread */ 134 nodev /* cb_awrite */ 135 }; 136 137 static struct dev_ops mc_ops = { 138 DEVO_REV, /* rev */ 139 0, /* refcnt */ 140 ddi_getinfo_1to1, /* getinfo */ 141 nulldev, /* identify */ 142 nulldev, /* probe */ 143 mc_attach, /* attach */ 144 mc_detach, /* detach */ 145 nulldev, /* reset */ 146 &mc_cb_ops, /* cb_ops */ 147 (struct bus_ops *)0, /* bus_ops */ 148 nulldev /* power */ 149 }; 150 151 /* 152 * Driver globals 153 */ 154 155 static enum { 156 MODEL_FF1 = 0, 157 MODEL_FF2 = 1, 158 MODEL_DC = 2 159 } plat_model = MODEL_DC; /* The default behaviour is DC */ 160 161 static struct plat_model_names { 162 const char *unit_name; 163 const char *mem_name; 164 } model_names[] = { 165 { "MBU_A", "MEMB" }, 166 { "MBU_B", "MEMB" }, 167 { "CMU", "" } 168 }; 169 170 /* 171 * The DIMM Names for DC platform. 172 * The index into this table is made up of (bank, dslot), 173 * Where dslot occupies bits 0-1 and bank occupies 2-4. 174 */ 175 static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = { 176 /* --------CMUnn----------- */ 177 /* --CS0-----|--CS1------ */ 178 /* -H-|--L-- | -H- | -L-- */ 179 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 180 "13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */ 181 "23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */ 182 "33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */ 183 "01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */ 184 "11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */ 185 "21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */ 186 "31A", "30A", "31B", "30B" /* Bank 7 (MAC 3 bank 1) */ 187 }; 188 189 /* 190 * The DIMM Names for FF1/FF2 platforms. 191 * The index into this table is made up of (board, bank, dslot), 192 * Where dslot occupies bits 0-1, bank occupies 2-4 and 193 * board occupies the bit 5. 194 */ 195 static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = { 196 /* --------CMU0---------- */ 197 /* --CS0-----|--CS1------ */ 198 /* -H-|--L-- | -H- | -L-- */ 199 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 200 "01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */ 201 "13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */ 202 "11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */ 203 "23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */ 204 "21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */ 205 "33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */ 206 "31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */ 207 /* --------CMU1---------- */ 208 /* --CS0-----|--CS1------ */ 209 /* -H-|--L-- | -H- | -L-- */ 210 "43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */ 211 "41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */ 212 "53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */ 213 "51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */ 214 "63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */ 215 "61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */ 216 "73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */ 217 "71A", "70A", "71B", "70B" /* Bank 7 (MAC 3 bank 1) */ 218 }; 219 220 #define BD_BK_SLOT_TO_INDEX(bd, bk, s) \ 221 (((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03)) 222 223 #define INDEX_TO_BANK(i) (((i) & 0x1C) >> 2) 224 #define INDEX_TO_SLOT(i) ((i) & 0x03) 225 226 /* Isolation unit size is 64 MB */ 227 #define MC_ISOLATION_BSIZE (64 * 1024 * 1024) 228 229 #define MC_MAX_SPEEDS 7 230 231 typedef struct { 232 uint32_t mc_speeds; 233 uint32_t mc_period; 234 } mc_scan_speed_t; 235 236 #define MC_CNTL_SPEED_SHIFT 26 237 238 /* 239 * In mirror mode, we normalized the bank idx to "even" since 240 * the HW treats them as one unit w.r.t programming. 241 * This bank index will be the "effective" bank index. 242 * All mirrored bank state info on mc_period, mc_speedup_period 243 * will be stored in the even bank structure to avoid code duplication. 244 */ 245 #define MIRROR_IDX(bankidx) (bankidx & ~1) 246 247 static mc_scan_speed_t mc_scan_speeds[MC_MAX_SPEEDS] = { 248 {0x6 << MC_CNTL_SPEED_SHIFT, 0}, 249 {0x5 << MC_CNTL_SPEED_SHIFT, 32}, 250 {0x4 << MC_CNTL_SPEED_SHIFT, 64}, 251 {0x3 << MC_CNTL_SPEED_SHIFT, 128}, 252 {0x2 << MC_CNTL_SPEED_SHIFT, 256}, 253 {0x1 << MC_CNTL_SPEED_SHIFT, 512}, 254 {0x0 << MC_CNTL_SPEED_SHIFT, 1024} 255 }; 256 257 static uint32_t mc_max_speed = (0x6 << 26); 258 259 int mc_isolation_bsize = MC_ISOLATION_BSIZE; 260 int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC; 261 int mc_max_scf_retry = 16; 262 int mc_max_scf_logs = 64; 263 int mc_max_errlog_processed = BANKNUM_PER_SB*2; 264 int mc_scan_period = 12 * 60 * 60; /* 12 hours period */ 265 int mc_max_rewrite_loop = 100; 266 int mc_rewrite_delay = 10; 267 /* 268 * it takes SCF about 300 m.s. to process a requst. We can bail out 269 * if it is busy. It does not pay to wait for it too long. 270 */ 271 int mc_max_scf_loop = 2; 272 int mc_scf_delay = 100; 273 int mc_pce_dropped = 0; 274 int mc_poll_priority = MINCLSYSPRI; 275 276 277 /* 278 * Mutex heierachy in mc-opl 279 * If both mcmutex and mc_lock must be held, 280 * mcmutex must be acquired first, and then mc_lock. 281 */ 282 283 static kmutex_t mcmutex; 284 mc_opl_t *mc_instances[OPL_MAX_BOARDS]; 285 286 static kmutex_t mc_polling_lock; 287 static kcondvar_t mc_polling_cv; 288 static kcondvar_t mc_poll_exit_cv; 289 static int mc_poll_cmd = 0; 290 static int mc_pollthr_running = 0; 291 int mc_timeout_period = 0; /* this is in m.s. */ 292 void *mc_statep; 293 294 #ifdef DEBUG 295 int oplmc_debug = 0; 296 #endif 297 298 static int mc_debug_show_all = 0; 299 300 extern struct mod_ops mod_driverops; 301 302 static struct modldrv modldrv = { 303 &mod_driverops, /* module type, this one is a driver */ 304 "OPL Memory-controller %I%", /* module name */ 305 &mc_ops, /* driver ops */ 306 }; 307 308 static struct modlinkage modlinkage = { 309 MODREV_1, /* rev */ 310 (void *)&modldrv, 311 NULL 312 }; 313 314 #pragma weak opl_get_mem_unum 315 #pragma weak opl_get_mem_sid 316 #pragma weak opl_get_mem_offset 317 #pragma weak opl_get_mem_addr 318 319 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 320 extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 321 extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 322 extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset, 323 uint64_t *paddr); 324 325 326 /* 327 * pseudo-mc node portid format 328 * 329 * [10] = 0 330 * [9] = 1 331 * [8] = LSB_ID[4] = 0 332 * [7:4] = LSB_ID[3:0] 333 * [3:0] = 0 334 * 335 */ 336 337 /* 338 * These are the module initialization routines. 339 */ 340 int 341 _init(void) 342 { 343 int error; 344 int plen; 345 char model[20]; 346 pnode_t node; 347 348 349 if ((error = ddi_soft_state_init(&mc_statep, 350 sizeof (mc_opl_t), 1)) != 0) 351 return (error); 352 353 if ((error = mc_poll_init()) != 0) { 354 ddi_soft_state_fini(&mc_statep); 355 return (error); 356 } 357 358 mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL); 359 if (&opl_get_mem_unum) 360 opl_get_mem_unum = mc_get_mem_unum; 361 if (&opl_get_mem_sid) 362 opl_get_mem_sid = mc_get_mem_sid; 363 if (&opl_get_mem_offset) 364 opl_get_mem_offset = mc_get_mem_offset; 365 if (&opl_get_mem_addr) 366 opl_get_mem_addr = mc_get_mem_addr; 367 368 node = prom_rootnode(); 369 plen = prom_getproplen(node, "model"); 370 371 if (plen > 0 && plen < sizeof (model)) { 372 (void) prom_getprop(node, "model", model); 373 model[plen] = '\0'; 374 if (strcmp(model, "FF1") == 0) 375 plat_model = MODEL_FF1; 376 else if (strcmp(model, "FF2") == 0) 377 plat_model = MODEL_FF2; 378 else if (strncmp(model, "DC", 2) == 0) 379 plat_model = MODEL_DC; 380 } 381 382 error = mod_install(&modlinkage); 383 if (error != 0) { 384 if (&opl_get_mem_unum) 385 opl_get_mem_unum = NULL; 386 if (&opl_get_mem_sid) 387 opl_get_mem_sid = NULL; 388 if (&opl_get_mem_offset) 389 opl_get_mem_offset = NULL; 390 if (&opl_get_mem_addr) 391 opl_get_mem_addr = NULL; 392 mutex_destroy(&mcmutex); 393 mc_poll_fini(); 394 ddi_soft_state_fini(&mc_statep); 395 } 396 return (error); 397 } 398 399 int 400 _fini(void) 401 { 402 int error; 403 404 if ((error = mod_remove(&modlinkage)) != 0) 405 return (error); 406 407 if (&opl_get_mem_unum) 408 opl_get_mem_unum = NULL; 409 if (&opl_get_mem_sid) 410 opl_get_mem_sid = NULL; 411 if (&opl_get_mem_offset) 412 opl_get_mem_offset = NULL; 413 if (&opl_get_mem_addr) 414 opl_get_mem_addr = NULL; 415 416 mutex_destroy(&mcmutex); 417 mc_poll_fini(); 418 ddi_soft_state_fini(&mc_statep); 419 420 return (0); 421 } 422 423 int 424 _info(struct modinfo *modinfop) 425 { 426 return (mod_info(&modlinkage, modinfop)); 427 } 428 429 static void 430 mc_polling_thread() 431 { 432 mutex_enter(&mc_polling_lock); 433 mc_pollthr_running = 1; 434 while (!(mc_poll_cmd & MC_POLL_EXIT)) { 435 mc_polling(); 436 cv_timedwait(&mc_polling_cv, &mc_polling_lock, 437 ddi_get_lbolt() + mc_timeout_period); 438 } 439 mc_pollthr_running = 0; 440 441 /* 442 * signal if any one is waiting for this thread to exit. 443 */ 444 cv_signal(&mc_poll_exit_cv); 445 mutex_exit(&mc_polling_lock); 446 thread_exit(); 447 /* NOTREACHED */ 448 } 449 450 static int 451 mc_poll_init() 452 { 453 mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL); 454 cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL); 455 cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL); 456 return (0); 457 } 458 459 static void 460 mc_poll_fini() 461 { 462 mutex_enter(&mc_polling_lock); 463 if (mc_pollthr_running) { 464 mc_poll_cmd = MC_POLL_EXIT; 465 cv_signal(&mc_polling_cv); 466 while (mc_pollthr_running) { 467 cv_wait(&mc_poll_exit_cv, &mc_polling_lock); 468 } 469 } 470 mutex_exit(&mc_polling_lock); 471 mutex_destroy(&mc_polling_lock); 472 cv_destroy(&mc_polling_cv); 473 cv_destroy(&mc_poll_exit_cv); 474 } 475 476 static int 477 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 478 { 479 mc_opl_t *mcp; 480 int instance; 481 int rv; 482 483 /* get the instance of this devi */ 484 instance = ddi_get_instance(devi); 485 486 switch (cmd) { 487 case DDI_ATTACH: 488 break; 489 case DDI_RESUME: 490 mcp = ddi_get_soft_state(mc_statep, instance); 491 rv = mc_resume(mcp, MC_DRIVER_SUSPENDED); 492 return (rv); 493 default: 494 return (DDI_FAILURE); 495 } 496 497 if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS) 498 return (DDI_FAILURE); 499 500 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 501 goto bad; 502 } 503 504 if (mc_timeout_period == 0) { 505 mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi, 506 DDI_PROP_DONTPASS, "mc-timeout-interval-sec", 507 mc_patrol_interval_sec); 508 mc_timeout_period = drv_usectohz( 509 1000000 * mc_patrol_interval_sec / OPL_MAX_BOARDS); 510 } 511 512 /* set informations in mc state */ 513 mcp->mc_dip = devi; 514 515 if (mc_board_add(mcp)) 516 goto bad; 517 518 insert_mcp(mcp); 519 520 /* 521 * Start the polling thread if it is not running already. 522 */ 523 mutex_enter(&mc_polling_lock); 524 if (!mc_pollthr_running) { 525 (void) thread_create(NULL, 0, (void (*)())mc_polling_thread, 526 NULL, 0, &p0, TS_RUN, mc_poll_priority); 527 } 528 mutex_exit(&mc_polling_lock); 529 ddi_report_dev(devi); 530 531 return (DDI_SUCCESS); 532 533 bad: 534 ddi_soft_state_free(mc_statep, instance); 535 return (DDI_FAILURE); 536 } 537 538 /* ARGSUSED */ 539 static int 540 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 541 { 542 int rv; 543 int instance; 544 mc_opl_t *mcp; 545 546 /* get the instance of this devi */ 547 instance = ddi_get_instance(devi); 548 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 549 return (DDI_FAILURE); 550 } 551 552 switch (cmd) { 553 case DDI_SUSPEND: 554 rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED); 555 return (rv); 556 case DDI_DETACH: 557 break; 558 default: 559 return (DDI_FAILURE); 560 } 561 562 delete_mcp(mcp); 563 if (mc_board_del(mcp) != DDI_SUCCESS) { 564 return (DDI_FAILURE); 565 } 566 567 /* free up the soft state */ 568 ddi_soft_state_free(mc_statep, instance); 569 570 return (DDI_SUCCESS); 571 } 572 573 /* ARGSUSED */ 574 static int 575 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp) 576 { 577 return (0); 578 } 579 580 /* ARGSUSED */ 581 static int 582 mc_close(dev_t devp, int flag, int otyp, cred_t *credp) 583 { 584 return (0); 585 } 586 587 /* ARGSUSED */ 588 static int 589 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 590 int *rvalp) 591 { 592 #ifdef DEBUG 593 return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp)); 594 #else 595 return (ENXIO); 596 #endif 597 } 598 599 /* 600 * PA validity check: 601 * This function return 1 if the PA is a valid PA 602 * in the running Solaris instance i.e. in physinstall 603 * Otherwise, return 0. 604 */ 605 606 /* ARGSUSED */ 607 static int 608 pa_is_valid(mc_opl_t *mcp, uint64_t addr) 609 { 610 if (mcp->mlist == NULL) 611 mc_get_mlist(mcp); 612 613 if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) { 614 return (1); 615 } 616 return (0); 617 } 618 619 /* 620 * mac-pa translation routines. 621 * 622 * Input: mc driver state, (LSB#, Bank#, DIMM address) 623 * Output: physical address 624 * 625 * Valid - return value: 0 626 * Invalid - return value: -1 627 */ 628 static int 629 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa) 630 { 631 int i; 632 uint64_t pa_offset = 0; 633 int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1; 634 int bank = maddr->ma_bank; 635 mc_addr_t maddr1; 636 int bank0, bank1; 637 638 MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank, 639 maddr->ma_dimm_addr); 640 641 /* loc validity check */ 642 ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd); 643 ASSERT(bank >= 0 && OPL_BANK_MAX > bank); 644 645 /* Do translation */ 646 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 647 int pa_bit = 0; 648 int mc_bit = mcp->mc_trans_table[cs][i]; 649 if (mc_bit < MC_ADDRESS_BITS) { 650 pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1; 651 } else if (mc_bit == MP_NONE) { 652 pa_bit = 0; 653 } else if (mc_bit == MP_BANK_0) { 654 pa_bit = bank & 1; 655 } else if (mc_bit == MP_BANK_1) { 656 pa_bit = (bank >> 1) & 1; 657 } else if (mc_bit == MP_BANK_2) { 658 pa_bit = (bank >> 2) & 1; 659 } 660 pa_offset |= ((uint64_t)pa_bit) << i; 661 } 662 *pa = mcp->mc_start_address + pa_offset; 663 MC_LOG("pa = %lx\n", *pa); 664 665 if (pa_to_maddr(mcp, *pa, &maddr1) == -1) { 666 cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to " 667 "convert PA %lx\n", maddr->ma_bd, bank, 668 maddr->ma_dimm_addr, *pa); 669 return (-1); 670 } 671 672 /* 673 * In mirror mode, PA is always translated to the even bank. 674 */ 675 if (IS_MIRROR(mcp, maddr->ma_bank)) { 676 bank0 = maddr->ma_bank & ~(1); 677 bank1 = maddr1.ma_bank & ~(1); 678 } else { 679 bank0 = maddr->ma_bank; 680 bank1 = maddr1.ma_bank; 681 } 682 /* 683 * there is no need to check ma_bd because it is generated from 684 * mcp. They are the same. 685 */ 686 if ((bank0 == bank1) && 687 (maddr->ma_dimm_addr == maddr1.ma_dimm_addr)) { 688 return (0); 689 } else { 690 cmn_err(CE_WARN, "Translation error source /LSB%d/B%d/%x, " 691 "PA %lx, target /LSB%d/B%d/%x\n", 692 maddr->ma_bd, bank, maddr->ma_dimm_addr, 693 *pa, maddr1.ma_bd, maddr1.ma_bank, 694 maddr1.ma_dimm_addr); 695 return (-1); 696 } 697 } 698 699 /* 700 * PA to CS (used by pa_to_maddr). 701 */ 702 static int 703 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset) 704 { 705 int i; 706 int cs = 1; 707 708 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 709 /* MAC address bit<29> is arranged on the same PA bit */ 710 /* on both table. So we may use any table. */ 711 if (mcp->mc_trans_table[0][i] == CS_SHIFT) { 712 cs = (pa_offset >> i) & 1; 713 break; 714 } 715 } 716 return (cs); 717 } 718 719 /* 720 * PA to DIMM (used by pa_to_maddr). 721 */ 722 /* ARGSUSED */ 723 static uint32_t 724 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset) 725 { 726 int i; 727 int cs = pa_to_cs(mcp, pa_offset); 728 uint32_t dimm_addr = 0; 729 730 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 731 int pa_bit_value = (pa_offset >> i) & 1; 732 int mc_bit = mcp->mc_trans_table[cs][i]; 733 if (mc_bit < MC_ADDRESS_BITS) { 734 dimm_addr |= pa_bit_value << mc_bit; 735 } 736 } 737 dimm_addr |= cs << CS_SHIFT; 738 return (dimm_addr); 739 } 740 741 /* 742 * PA to Bank (used by pa_to_maddr). 743 */ 744 static int 745 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset) 746 { 747 int i; 748 int cs = pa_to_cs(mcp, pa_offset); 749 int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT]; 750 751 752 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 753 int pa_bit_value = (pa_offset >> i) & 1; 754 int mc_bit = mcp->mc_trans_table[cs][i]; 755 switch (mc_bit) { 756 case MP_BANK_0: 757 bankno |= pa_bit_value; 758 break; 759 case MP_BANK_1: 760 bankno |= pa_bit_value << 1; 761 break; 762 case MP_BANK_2: 763 bankno |= pa_bit_value << 2; 764 break; 765 } 766 } 767 768 return (bankno); 769 } 770 771 /* 772 * PA to MAC address translation 773 * 774 * Input: MAC driver state, physicall adress 775 * Output: LSB#, Bank id, mac address 776 * 777 * Valid - return value: 0 778 * Invalid - return value: -1 779 */ 780 781 int 782 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr) 783 { 784 uint64_t pa_offset; 785 786 if (!mc_rangecheck_pa(mcp, pa)) 787 return (-1); 788 789 /* Do translation */ 790 pa_offset = pa - mcp->mc_start_address; 791 792 maddr->ma_bd = mcp->mc_board_num; 793 maddr->ma_bank = pa_to_bank(mcp, pa_offset); 794 maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset); 795 MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", 796 pa_offset, maddr->ma_bd, maddr->ma_bank, maddr->ma_dimm_addr); 797 return (0); 798 } 799 800 /* 801 * UNUM format for DC is "/CMUnn/MEMxyZ", where 802 * nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3. 803 * x = MAC 0..3 804 * y = 0..3 (slot info). 805 * Z = 'A' or 'B' 806 * 807 * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where 808 * x = 0..3 (MEMB number) 809 * y = 0..3 (slot info). 810 * Z = 'A' or 'B' 811 * 812 * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ" 813 * x = 0..7 (MEMB number) 814 * y = 0..3 (slot info). 815 * Z = 'A' or 'B' 816 */ 817 int 818 mc_set_mem_unum(char *buf, int buflen, int lsb, int bank, 819 uint32_t mf_type, uint32_t d_slot) 820 { 821 char *dimmnm; 822 char memb_num; 823 int sb; 824 int i; 825 826 if ((sb = mc_opl_get_physical_board(lsb)) < 0) 827 return (ENODEV); 828 829 if (plat_model == MODEL_DC) { 830 if (mf_type == FLT_TYPE_PERMANENT_CE) { 831 i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 832 dimmnm = mc_dc_dimm_unum_table[i]; 833 snprintf(buf, buflen, "/%s%02d/MEM%s", 834 model_names[plat_model].unit_name, sb, dimmnm); 835 } else { 836 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 837 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s MEM%s MEM%s", 838 model_names[plat_model].unit_name, sb, 839 mc_dc_dimm_unum_table[i], 840 mc_dc_dimm_unum_table[i + 1], 841 mc_dc_dimm_unum_table[i + 2], 842 mc_dc_dimm_unum_table[i + 3]); 843 } 844 } else { 845 i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 846 if (mf_type == FLT_TYPE_PERMANENT_CE) { 847 dimmnm = mc_ff_dimm_unum_table[i]; 848 memb_num = dimmnm[0]; 849 snprintf(buf, buflen, "/%s/%s%c/MEM%s", 850 model_names[plat_model].unit_name, 851 model_names[plat_model].mem_name, 852 memb_num, &dimmnm[1]); 853 } else { 854 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 855 memb_num = mc_ff_dimm_unum_table[i][0], 856 snprintf(buf, buflen, 857 "/%s/%s%c/MEM%s MEM%s MEM%s MEM%s", 858 model_names[plat_model].unit_name, 859 model_names[plat_model].mem_name, memb_num, 860 &mc_ff_dimm_unum_table[i][1], 861 &mc_ff_dimm_unum_table[i + 1][1], 862 &mc_ff_dimm_unum_table[i + 2][1], 863 &mc_ff_dimm_unum_table[i + 3][1]); 864 } 865 } 866 return (0); 867 } 868 869 static void 870 mc_ereport_post(mc_aflt_t *mc_aflt) 871 { 872 char buf[FM_MAX_CLASS]; 873 char device_path[MAXPATHLEN]; 874 char sid[MAXPATHLEN]; 875 nv_alloc_t *nva = NULL; 876 nvlist_t *ereport, *detector, *resource; 877 errorq_elem_t *eqep; 878 int nflts; 879 mc_flt_stat_t *flt_stat; 880 int i, n; 881 int blen = MAXPATHLEN; 882 char *p, *s = NULL; 883 uint32_t values[2], synd[2], dslot[2]; 884 uint64_t offset = (uint64_t)-1; 885 int ret = -1; 886 887 if (panicstr) { 888 eqep = errorq_reserve(ereport_errorq); 889 if (eqep == NULL) 890 return; 891 ereport = errorq_elem_nvl(ereport_errorq, eqep); 892 nva = errorq_elem_nva(ereport_errorq, eqep); 893 } else { 894 ereport = fm_nvlist_create(nva); 895 } 896 897 /* 898 * Create the scheme "dev" FMRI. 899 */ 900 detector = fm_nvlist_create(nva); 901 resource = fm_nvlist_create(nva); 902 903 nflts = mc_aflt->mflt_nflts; 904 905 ASSERT(nflts >= 1 && nflts <= 2); 906 907 flt_stat = mc_aflt->mflt_stat[0]; 908 (void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path); 909 (void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL, 910 device_path, NULL); 911 912 /* 913 * Encode all the common data into the ereport. 914 */ 915 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", 916 MC_OPL_ERROR_CLASS, 917 mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : 918 MC_OPL_MI_SUBCLASS, 919 mc_aflt->mflt_erpt_class); 920 921 MC_LOG("mc_ereport_post: ereport %s\n", buf); 922 923 924 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 925 fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), 926 detector, NULL); 927 928 /* 929 * Set payload. 930 */ 931 fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32, 932 flt_stat->mf_flt_maddr.ma_bd, NULL); 933 934 fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64, 935 flt_stat->mf_flt_paddr, NULL); 936 937 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 938 fm_payload_set(ereport, MC_OPL_FLT_TYPE, 939 DATA_TYPE_UINT8, ECC_STICKY, NULL); 940 } 941 942 for (i = 0; i < nflts; i++) 943 values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank; 944 945 fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, 946 nflts, values, NULL); 947 948 for (i = 0; i < nflts; i++) 949 values[i] = mc_aflt->mflt_stat[i]->mf_cntl; 950 951 fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, 952 nflts, values, NULL); 953 954 for (i = 0; i < nflts; i++) 955 values[i] = mc_aflt->mflt_stat[i]->mf_err_add; 956 957 /* offset is set only for PCE */ 958 if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) { 959 offset = values[0]; 960 961 } 962 fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, 963 nflts, values, NULL); 964 965 for (i = 0; i < nflts; i++) 966 values[i] = mc_aflt->mflt_stat[i]->mf_err_log; 967 968 fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, 969 nflts, values, NULL); 970 971 for (i = 0; i < nflts; i++) { 972 flt_stat = mc_aflt->mflt_stat[i]; 973 if (flt_stat->mf_errlog_valid) { 974 synd[i] = flt_stat->mf_synd; 975 dslot[i] = flt_stat->mf_dimm_slot; 976 values[i] = flt_stat->mf_dram_place; 977 } else { 978 synd[i] = 0; 979 dslot[i] = 0; 980 values[i] = 0; 981 } 982 } 983 984 fm_payload_set(ereport, MC_OPL_ERR_SYND, 985 DATA_TYPE_UINT32_ARRAY, nflts, synd, NULL); 986 987 fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, 988 DATA_TYPE_UINT32_ARRAY, nflts, dslot, NULL); 989 990 fm_payload_set(ereport, MC_OPL_ERR_DRAM, 991 DATA_TYPE_UINT32_ARRAY, nflts, values, NULL); 992 993 device_path[0] = 0; 994 p = &device_path[0]; 995 sid[0] = 0; 996 s = &sid[0]; 997 ret = 0; 998 999 for (i = 0; i < nflts; i++) { 1000 int bank; 1001 1002 flt_stat = mc_aflt->mflt_stat[i]; 1003 bank = flt_stat->mf_flt_maddr.ma_bank; 1004 ret = mc_set_mem_unum(p + strlen(p), blen, 1005 flt_stat->mf_flt_maddr.ma_bd, bank, flt_stat->mf_type, 1006 flt_stat->mf_dimm_slot); 1007 1008 if (ret != 0) { 1009 cmn_err(CE_WARN, 1010 "mc_ereport_post: Failed to determine the unum " 1011 "for board=%d bank=%d type=0x%x slot=0x%x", 1012 flt_stat->mf_flt_maddr.ma_bd, bank, 1013 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1014 continue; 1015 } 1016 n = strlen(device_path); 1017 blen = MAXPATHLEN - n; 1018 p = &device_path[n]; 1019 if (i < (nflts - 1)) { 1020 snprintf(p, blen, " "); 1021 blen--; 1022 p++; 1023 } 1024 1025 if (ret == 0) { 1026 ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s), 1027 blen, flt_stat->mf_flt_maddr.ma_bd, bank, 1028 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1029 1030 } 1031 } 1032 1033 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1034 NULL, device_path, (ret == 0) ? sid : NULL, 1035 (ret == 0) ? offset : (uint64_t)-1); 1036 1037 fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, 1038 resource, NULL); 1039 1040 if (panicstr) { 1041 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1042 } else { 1043 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1044 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1045 fm_nvlist_destroy(detector, FM_NVA_FREE); 1046 fm_nvlist_destroy(resource, FM_NVA_FREE); 1047 } 1048 } 1049 1050 1051 static void 1052 mc_err_drain(mc_aflt_t *mc_aflt) 1053 { 1054 int rv; 1055 uint64_t pa = (uint64_t)(-1); 1056 int i; 1057 1058 MC_LOG("mc_err_drain: %s\n", 1059 mc_aflt->mflt_erpt_class); 1060 /* 1061 * we come here only when we have: 1062 * In mirror mode: CMPE, MUE, SUE 1063 * In normal mode: UE, Permanent CE 1064 */ 1065 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1066 rv = mcaddr_to_pa(mc_aflt->mflt_mcp, 1067 &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa); 1068 1069 /* Ensure the pa is valid (not in isolated memory block) */ 1070 if (rv == 0 && pa_is_valid(mc_aflt->mflt_mcp, pa)) 1071 mc_aflt->mflt_stat[i]->mf_flt_paddr = pa; 1072 else 1073 mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1; 1074 } 1075 1076 MC_LOG("mc_err_drain:pa = %lx\n", pa); 1077 1078 switch (page_retire_check(pa, NULL)) { 1079 case 0: 1080 case EAGAIN: 1081 MC_LOG("Page retired or pending\n"); 1082 return; 1083 case EIO: 1084 /* 1085 * Do page retirement except for the PCE case. 1086 * This is taken care by the OPL DE 1087 */ 1088 if (mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) { 1089 MC_LOG("offline page at pa %lx error %x\n", pa, 1090 mc_aflt->mflt_pr); 1091 (void) page_retire(pa, mc_aflt->mflt_pr); 1092 } 1093 break; 1094 case EINVAL: 1095 default: 1096 /* 1097 * Some memory do not have page structure so 1098 * we keep going in case of EINVAL. 1099 */ 1100 break; 1101 } 1102 1103 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1104 mc_aflt_t mc_aflt0; 1105 if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) { 1106 mc_aflt0 = *mc_aflt; 1107 mc_aflt0.mflt_nflts = 1; 1108 mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i]; 1109 mc_ereport_post(&mc_aflt0); 1110 } 1111 } 1112 } 1113 1114 /* 1115 * The restart address is actually defined in unit of PA[37:6] 1116 * the mac patrol will convert that to dimm offset. If the 1117 * address is not in the bank, it will continue to search for 1118 * the next PA that is within the bank. 1119 * 1120 * Also the mac patrol scans the dimms based on PA, not 1121 * dimm offset. 1122 */ 1123 static int 1124 restart_patrol(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr_info) 1125 { 1126 uint64_t pa; 1127 int rv; 1128 1129 if (rsaddr_info == NULL || (rsaddr_info->mi_valid == 0)) { 1130 MAC_PTRL_START(mcp, bank); 1131 return (0); 1132 } 1133 1134 rv = mcaddr_to_pa(mcp, &rsaddr_info->mi_restartaddr, &pa); 1135 if (rv != 0) { 1136 MC_LOG("cannot convert mcaddr to pa. use auto restart\n"); 1137 MAC_PTRL_START(mcp, bank); 1138 return (0); 1139 } 1140 1141 if (!mc_rangecheck_pa(mcp, pa)) { 1142 /* pa is not on this board, just retry */ 1143 cmn_err(CE_WARN, "restart_patrol: invalid address %lx " 1144 "on board %d\n", pa, mcp->mc_board_num); 1145 MAC_PTRL_START(mcp, bank); 1146 return (0); 1147 } 1148 1149 MC_LOG("restart_patrol: pa = %lx\n", pa); 1150 1151 if (!rsaddr_info->mi_injectrestart) { 1152 /* 1153 * For non-errorinjection restart we need to 1154 * determine if the current restart pa/page is 1155 * a "good" page. A "good" page is a page that 1156 * has not been page retired. If the current 1157 * page that contains the pa is "good", we will 1158 * do a HW auto restart and let HW patrol continue 1159 * where it last stopped. Most desired scenario. 1160 * 1161 * If the current page is not "good", we will advance 1162 * to the next page to find the next "good" page and 1163 * restart the patrol from there. 1164 */ 1165 int wrapcount = 0; 1166 uint64_t origpa = pa; 1167 while (wrapcount < 2) { 1168 if (!pa_is_valid(mcp, pa)) { 1169 /* 1170 * Not in physinstall - advance to the 1171 * next memory isolation blocksize 1172 */ 1173 MC_LOG("Invalid PA\n"); 1174 pa = roundup(pa + 1, mc_isolation_bsize); 1175 } else { 1176 int rv; 1177 if ((rv = page_retire_check(pa, NULL)) != 0 && 1178 rv != EAGAIN) { 1179 /* 1180 * The page is "good" (not retired), we will 1181 * use automatic HW restart algorithm if 1182 * this is the original current starting page 1183 */ 1184 if (pa == origpa) { 1185 MC_LOG("Page has no error. Auto restart\n"); 1186 MAC_PTRL_START(mcp, bank); 1187 return (0); 1188 } else { 1189 /* found a subsequent good page */ 1190 break; 1191 } 1192 } 1193 1194 /* 1195 * Skip to the next page 1196 */ 1197 pa = roundup(pa + 1, PAGESIZE); 1198 MC_LOG("Skipping bad page to %lx\n", pa); 1199 } 1200 1201 /* Check to see if we hit the end of the memory range */ 1202 if (pa >= (mcp->mc_start_address + mcp->mc_size)) { 1203 MC_LOG("Wrap around\n"); 1204 pa = mcp->mc_start_address; 1205 wrapcount++; 1206 } 1207 } 1208 1209 if (wrapcount > 1) { 1210 MC_LOG("Failed to find a good page. Just restart\n"); 1211 MAC_PTRL_START(mcp, bank); 1212 return (0); 1213 } 1214 } 1215 1216 /* 1217 * We reached here either: 1218 * 1. We are doing an error injection restart that specify 1219 * the exact pa/page to restart. OR 1220 * 2. We found a subsequent good page different from the 1221 * original restart pa/page. 1222 * Restart MAC patrol: PA[37:6] 1223 */ 1224 MC_LOG("restart at pa = %lx\n", pa); 1225 ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa)); 1226 MAC_PTRL_START_ADD(mcp, bank); 1227 1228 return (0); 1229 } 1230 1231 /* 1232 * Rewriting is used for two purposes. 1233 * - to correct the error in memory. 1234 * - to determine whether the error is permanent or intermittent. 1235 * It's done by writing the address in MAC_BANKm_REWRITE_ADD 1236 * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that, 1237 * REW_END (and REW_CE/REW_UE if some error detected) is set when 1238 * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM. 1239 * 1240 * Note that rewrite operation doesn't change RAW_UE to Marked UE. 1241 * Therefore, we use it only CE case. 1242 */ 1243 static uint32_t 1244 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr) 1245 { 1246 uint32_t cntl; 1247 int count = 0; 1248 1249 /* first wait to make sure PTRL_STATUS is 0 */ 1250 while (count++ < mc_max_rewrite_loop) { 1251 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1252 if (!(cntl & MAC_CNTL_PTRL_STATUS)) 1253 break; 1254 drv_usecwait(mc_rewrite_delay); 1255 } 1256 if (count >= mc_max_rewrite_loop) 1257 goto bad; 1258 1259 count = 0; 1260 1261 ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr); 1262 MAC_REW_REQ(mcp, bank); 1263 1264 do { 1265 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1266 if (count++ >= mc_max_rewrite_loop) { 1267 goto bad; 1268 } else { 1269 drv_usecwait(mc_rewrite_delay); 1270 } 1271 /* 1272 * If there are other MEMORY or PCI activities, this 1273 * will be BUSY, else it should be set immediately 1274 */ 1275 } while (!(cntl & MAC_CNTL_REW_END)); 1276 1277 MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS); 1278 return (cntl); 1279 bad: 1280 /* This is bad. Just reset the circuit */ 1281 cmn_err(CE_WARN, "mc-opl rewrite timeout on /LSB%d/B%d\n", 1282 mcp->mc_board_num, bank); 1283 cntl = MAC_CNTL_REW_END; 1284 MAC_CMD(mcp, bank, MAC_CNTL_PTRL_RESET); 1285 MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS); 1286 return (cntl); 1287 } 1288 void 1289 mc_process_scf_log(mc_opl_t *mcp) 1290 { 1291 int count; 1292 int n = 0; 1293 scf_log_t *p; 1294 int bank; 1295 1296 for (bank = 0; bank < BANKNUM_PER_SB; bank++) { 1297 while ((p = mcp->mc_scf_log[bank]) != NULL && 1298 (n < mc_max_errlog_processed)) { 1299 ASSERT(bank == p->sl_bank); 1300 count = 0; 1301 while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank)) 1302 & MAC_STATIC_ERR_VLD)) { 1303 if (count++ >= (mc_max_scf_loop)) { 1304 break; 1305 } 1306 drv_usecwait(mc_scf_delay); 1307 } 1308 1309 if (count < mc_max_scf_loop) { 1310 ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank), 1311 p->sl_err_log); 1312 1313 ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank), 1314 p->sl_err_add|MAC_STATIC_ERR_VLD); 1315 mcp->mc_scf_retry[bank] = 0; 1316 } else { 1317 /* if we try too many times, just drop the req */ 1318 if (mcp->mc_scf_retry[bank]++ <= mc_max_scf_retry) { 1319 return; 1320 } else { 1321 if ((++mc_pce_dropped & 0xff) == 0) { 1322 cmn_err(CE_WARN, 1323 "Cannot report Permanent CE to SCF\n"); 1324 } 1325 } 1326 } 1327 n++; 1328 mcp->mc_scf_log[bank] = p->sl_next; 1329 mcp->mc_scf_total[bank]--; 1330 ASSERT(mcp->mc_scf_total[bank] >= 0); 1331 kmem_free(p, sizeof (scf_log_t)); 1332 } 1333 } 1334 } 1335 void 1336 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank) 1337 { 1338 scf_log_t *p; 1339 1340 if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) { 1341 if ((++mc_pce_dropped & 0xff) == 0) { 1342 cmn_err(CE_WARN, "Too many Permanent CE requests.\n"); 1343 } 1344 return; 1345 } 1346 p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP); 1347 p->sl_next = 0; 1348 p->sl_err_add = flt_stat->mf_err_add; 1349 p->sl_err_log = flt_stat->mf_err_log; 1350 p->sl_bank = bank; 1351 1352 if (mcp->mc_scf_log[bank] == NULL) { 1353 /* 1354 * we rely on mc_scf_log to detect NULL queue. 1355 * mc_scf_log_tail is irrelevant is such case. 1356 */ 1357 mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p; 1358 } else { 1359 mcp->mc_scf_log_tail[bank]->sl_next = p; 1360 mcp->mc_scf_log_tail[bank] = p; 1361 } 1362 mcp->mc_scf_total[bank]++; 1363 } 1364 /* 1365 * This routine determines what kind of CE happens, intermittent 1366 * or permanent as follows. (See 4.7.3 in Columbus2 PRM.) 1367 * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register. 1368 * - If CE is still detected on the same address even after doing 1369 * rewrite operation twice, it is determined as permanent error. 1370 * - If error is not detected anymore, it is determined as intermittent 1371 * error. 1372 * - If UE is detected due to rewrite operation, it should be treated 1373 * as UE. 1374 */ 1375 1376 /* ARGSUSED */ 1377 static void 1378 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error) 1379 { 1380 uint32_t cntl; 1381 int i; 1382 1383 flt_stat->mf_type = FLT_TYPE_PERMANENT_CE; 1384 /* 1385 * rewrite request 1st time reads and correct error data 1386 * and write to DIMM. 2nd rewrite request must be issued 1387 * after REW_CE/UE/END is 0. When the 2nd request is completed, 1388 * if REW_CE = 1, then it is permanent CE. 1389 */ 1390 for (i = 0; i < 2; i++) { 1391 cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add); 1392 /* 1393 * If the error becomes UE or CMPE 1394 * we return to the caller immediately. 1395 */ 1396 if (cntl & MAC_CNTL_REW_UE) { 1397 if (ptrl_error) 1398 flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE; 1399 else 1400 flt_stat->mf_cntl |= MAC_CNTL_MI_UE; 1401 flt_stat->mf_type = FLT_TYPE_UE; 1402 return; 1403 } 1404 if (cntl & MAC_CNTL_REW_CMPE) { 1405 if (ptrl_error) 1406 flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE; 1407 else 1408 flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE; 1409 flt_stat->mf_type = FLT_TYPE_CMPE; 1410 return; 1411 } 1412 } 1413 if (!(cntl & MAC_CNTL_REW_CE)) { 1414 flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE; 1415 } 1416 1417 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1418 /* report PERMANENT_CE to SP via SCF */ 1419 if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) { 1420 mc_queue_scf_log(mcp, flt_stat, bank); 1421 } 1422 } 1423 } 1424 1425 #define IS_CMPE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\ 1426 MAC_CNTL_MI_CMPE)) 1427 #define IS_UE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE)) 1428 #define IS_CE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE)) 1429 #define IS_OK(cntl, f) (!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \ 1430 MAC_CNTL_MI_ERRS))) 1431 1432 1433 static int 1434 IS_CE_ONLY(uint32_t cntl, int ptrl_error) 1435 { 1436 if (ptrl_error) { 1437 return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE); 1438 } else { 1439 return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE); 1440 } 1441 } 1442 1443 void 1444 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value) 1445 { 1446 int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank; 1447 1448 if (mcp->mc_speedup_period[ebank] > 0) 1449 value |= mc_max_speed; 1450 else 1451 value |= mcp->mc_speed; 1452 ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value); 1453 } 1454 1455 static void 1456 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1457 { 1458 flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1459 MAC_CNTL_PTRL_ERRS; 1460 flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank)); 1461 flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank)); 1462 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1463 flt_stat->mf_flt_maddr.ma_bank = bank; 1464 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1465 } 1466 1467 static void 1468 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1469 { 1470 uint32_t status, old_status; 1471 1472 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1473 MAC_CNTL_MI_ERRS; 1474 old_status = 0; 1475 1476 /* we keep reading until the status is stable */ 1477 while (old_status != status) { 1478 old_status = status; 1479 flt_stat->mf_err_add = 1480 LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank)); 1481 flt_stat->mf_err_log = 1482 LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank)); 1483 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1484 MAC_CNTL_MI_ERRS; 1485 if (status == old_status) { 1486 break; 1487 } 1488 } 1489 1490 flt_stat->mf_cntl = status; 1491 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1492 flt_stat->mf_flt_maddr.ma_bank = bank; 1493 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1494 } 1495 1496 1497 /* 1498 * Error philosophy for mirror mode: 1499 * 1500 * PTRL (The error address for both banks are same, since ptrl stops if it 1501 * detects error.) 1502 * - Compaire error Report CMPE. 1503 * 1504 * - UE-UE Report MUE. No rewrite. 1505 * 1506 * - UE-* UE-(CE/OK). Rewrite to scrub UE. Report SUE. 1507 * 1508 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1509 * If CE is permanent, inform SCF. Once for each 1510 * Dimm. If CE becomes UE or CMPE, go back to above. 1511 * 1512 * 1513 * MI (The error addresses for each bank are the same or different.) 1514 * - Compair error If addresses are the same. Just CMPE. 1515 * If addresses are different (this could happen 1516 * as a result of scrubbing. Report each seperately. 1517 * Only report error info on each side. 1518 * 1519 * - UE-UE Addresses are the same. Report MUE. 1520 * Addresses are different. Report SUE on each bank. 1521 * Rewrite to clear UE. 1522 * 1523 * - UE-* UE-(CE/OK) 1524 * Rewrite to clear UE. Report SUE for the bank. 1525 * 1526 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1527 * If CE becomes UE or CMPE, go back to above. 1528 * 1529 */ 1530 1531 static int 1532 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat) 1533 { 1534 int ptrl_error = mc_aflt->mflt_is_ptrl; 1535 int i; 1536 int rv = 0; 1537 1538 MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n", 1539 flt_stat[0].mf_cntl, flt_stat[1].mf_cntl); 1540 1541 if (ptrl_error) { 1542 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) 1543 & MAC_CNTL_PTRL_ERRS) == 0) 1544 return (0); 1545 } else { 1546 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) 1547 & MAC_CNTL_MI_ERRS) == 0) 1548 return (0); 1549 } 1550 1551 /* 1552 * First we take care of the case of CE 1553 * because they can become UE or CMPE 1554 */ 1555 for (i = 0; i < 2; i++) { 1556 if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) { 1557 MC_LOG("CE detected on bank %d\n", 1558 flt_stat[i].mf_flt_maddr.ma_bank); 1559 mc_scrub_ce(mcp, flt_stat[i].mf_flt_maddr.ma_bank, 1560 &flt_stat[i], ptrl_error); 1561 rv = 1; 1562 } 1563 } 1564 1565 /* The above scrubbing can turn CE into UE or CMPE */ 1566 1567 /* 1568 * Now we distinguish two cases: same address or not 1569 * the same address. It might seem more intuitive to 1570 * distinguish PTRL v.s. MI error but it is more 1571 * complicated that way. 1572 */ 1573 1574 if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) { 1575 1576 if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) || 1577 IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) { 1578 flt_stat[0].mf_type = FLT_TYPE_CMPE; 1579 flt_stat[1].mf_type = FLT_TYPE_CMPE; 1580 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1581 MC_LOG("cmpe error detected\n"); 1582 mc_aflt->mflt_nflts = 2; 1583 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1584 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1585 mc_aflt->mflt_pr = PR_UE; 1586 mc_err_drain(mc_aflt); 1587 return (1); 1588 } 1589 1590 if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) && 1591 IS_UE(flt_stat[1].mf_cntl, ptrl_error)) { 1592 /* Both side are UE's */ 1593 1594 MAC_SET_ERRLOG_INFO(&flt_stat[0]); 1595 MAC_SET_ERRLOG_INFO(&flt_stat[1]); 1596 MC_LOG("MUE detected\n"); 1597 flt_stat[0].mf_type = FLT_TYPE_MUE; 1598 flt_stat[1].mf_type = FLT_TYPE_MUE; 1599 mc_aflt->mflt_erpt_class = MC_OPL_MUE; 1600 mc_aflt->mflt_nflts = 2; 1601 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1602 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1603 mc_aflt->mflt_pr = PR_UE; 1604 mc_err_drain(mc_aflt); 1605 return (1); 1606 } 1607 1608 /* Now the only case is UE/CE, UE/OK, or don't care */ 1609 for (i = 0; i < 2; i++) { 1610 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1611 1612 /* rewrite can clear the one side UE error */ 1613 1614 if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) { 1615 (void) do_rewrite(mcp, 1616 flt_stat[i].mf_flt_maddr.ma_bank, 1617 flt_stat[i].mf_flt_maddr.ma_dimm_addr); 1618 } 1619 flt_stat[i].mf_type = FLT_TYPE_UE; 1620 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1621 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1622 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1623 mc_aflt->mflt_nflts = 1; 1624 mc_aflt->mflt_pr = PR_MCE; 1625 mc_err_drain(mc_aflt); 1626 /* Once we hit a UE/CE or UE/OK case, done */ 1627 return (1); 1628 } 1629 } 1630 1631 } else { 1632 /* 1633 * addresses are different. That means errors 1634 * on the 2 banks are not related at all. 1635 */ 1636 for (i = 0; i < 2; i++) { 1637 if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) { 1638 flt_stat[i].mf_type = FLT_TYPE_CMPE; 1639 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1640 MC_LOG("cmpe error detected\n"); 1641 mc_aflt->mflt_nflts = 1; 1642 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1643 mc_aflt->mflt_pr = PR_UE; 1644 mc_err_drain(mc_aflt); 1645 /* no more report on this bank */ 1646 flt_stat[i].mf_cntl = 0; 1647 rv = 1; 1648 } 1649 } 1650 1651 /* rewrite can clear the one side UE error */ 1652 1653 for (i = 0; i < 2; i++) { 1654 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1655 (void) do_rewrite(mcp, 1656 flt_stat[i].mf_flt_maddr.ma_bank, 1657 flt_stat[i].mf_flt_maddr.ma_dimm_addr); 1658 flt_stat[i].mf_type = FLT_TYPE_UE; 1659 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1660 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1661 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1662 mc_aflt->mflt_nflts = 1; 1663 mc_aflt->mflt_pr = PR_MCE; 1664 mc_err_drain(mc_aflt); 1665 rv = 1; 1666 } 1667 } 1668 } 1669 return (rv); 1670 } 1671 static void 1672 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr) 1673 { 1674 mc_aflt_t mc_aflt; 1675 mc_flt_stat_t flt_stat[2], mi_flt_stat[2]; 1676 int i; 1677 int mi_valid; 1678 1679 ASSERT(rsaddr); 1680 1681 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1682 bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t)); 1683 bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t)); 1684 1685 mc_aflt.mflt_mcp = mcp; 1686 mc_aflt.mflt_id = gethrtime(); 1687 1688 /* Now read all the registers into flt_stat */ 1689 1690 for (i = 0; i < 2; i++) { 1691 MC_LOG("Reading registers of bank %d\n", bank); 1692 /* patrol registers */ 1693 mc_read_ptrl_reg(mcp, bank, &flt_stat[i]); 1694 1695 /* 1696 * In mirror mode, it is possible that only one bank 1697 * may report the error. We need to check for it to 1698 * ensure we pick the right addr value for patrol restart. 1699 * Note that if both banks reported errors, we pick the 1700 * 2nd one. Both banks should reported the same error address. 1701 */ 1702 if (flt_stat[i].mf_cntl & MAC_CNTL_PTRL_ERRS) 1703 rsaddr->mi_restartaddr = flt_stat[i].mf_flt_maddr; 1704 1705 MC_LOG("ptrl registers cntl %x add %x log %x\n", 1706 flt_stat[i].mf_cntl, 1707 flt_stat[i].mf_err_add, 1708 flt_stat[i].mf_err_log); 1709 1710 /* MI registers */ 1711 mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]); 1712 1713 MC_LOG("MI registers cntl %x add %x log %x\n", 1714 mi_flt_stat[i].mf_cntl, 1715 mi_flt_stat[i].mf_err_add, 1716 mi_flt_stat[i].mf_err_log); 1717 1718 bank = bank^1; 1719 } 1720 1721 /* clear errors once we read all the registers */ 1722 MAC_CLEAR_ERRS(mcp, bank, 1723 (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1724 1725 MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1726 1727 /* Process MI errors first */ 1728 1729 /* if not error mode, cntl1 is 0 */ 1730 if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1731 (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1732 mi_flt_stat[0].mf_cntl = 0; 1733 1734 if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1735 (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1736 mi_flt_stat[1].mf_cntl = 0; 1737 1738 mc_aflt.mflt_is_ptrl = 0; 1739 mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]); 1740 1741 if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1742 MAC_CNTL_PTRL_ERR_SHIFT) == 1743 ((mi_flt_stat[0].mf_cntl & MAC_CNTL_MI_ERRS) >> 1744 MAC_CNTL_MI_ERR_SHIFT)) && 1745 (flt_stat[0].mf_err_add == mi_flt_stat[0].mf_err_add) && 1746 (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1747 MAC_CNTL_PTRL_ERR_SHIFT) == 1748 ((mi_flt_stat[1].mf_cntl & MAC_CNTL_MI_ERRS) >> 1749 MAC_CNTL_MI_ERR_SHIFT)) && 1750 (flt_stat[1].mf_err_add == mi_flt_stat[1].mf_err_add)) { 1751 #ifdef DEBUG 1752 MC_LOG("discarding PTRL error because " 1753 "it is the same as MI\n"); 1754 #endif 1755 rsaddr->mi_valid = mi_valid; 1756 return; 1757 } 1758 /* if not error mode, cntl1 is 0 */ 1759 if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1760 (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1761 flt_stat[0].mf_cntl = 0; 1762 1763 if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1764 (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1765 flt_stat[1].mf_cntl = 0; 1766 1767 mc_aflt.mflt_is_ptrl = 1; 1768 rsaddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]); 1769 } 1770 static int 1771 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt, 1772 mc_flt_stat_t *flt_stat) 1773 { 1774 int ptrl_error = mc_aflt->mflt_is_ptrl; 1775 int rv = 0; 1776 1777 mc_aflt->mflt_erpt_class = NULL; 1778 if (IS_UE(flt_stat->mf_cntl, ptrl_error)) { 1779 MC_LOG("UE deteceted\n"); 1780 flt_stat->mf_type = FLT_TYPE_UE; 1781 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1782 mc_aflt->mflt_pr = PR_UE; 1783 MAC_SET_ERRLOG_INFO(flt_stat); 1784 rv = 1; 1785 } else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) { 1786 MC_LOG("CE deteceted\n"); 1787 MAC_SET_ERRLOG_INFO(flt_stat); 1788 1789 /* Error type can change after scrubing */ 1790 mc_scrub_ce(mcp, bank, flt_stat, ptrl_error); 1791 1792 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1793 mc_aflt->mflt_erpt_class = MC_OPL_CE; 1794 mc_aflt->mflt_pr = PR_MCE; 1795 } else if (flt_stat->mf_type == FLT_TYPE_UE) { 1796 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1797 mc_aflt->mflt_pr = PR_UE; 1798 } 1799 rv = 1; 1800 } 1801 MC_LOG("mc_process_error: fault type %x erpt %s\n", 1802 flt_stat->mf_type, 1803 mc_aflt->mflt_erpt_class); 1804 if (mc_aflt->mflt_erpt_class) { 1805 mc_aflt->mflt_stat[0] = flt_stat; 1806 mc_aflt->mflt_nflts = 1; 1807 mc_err_drain(mc_aflt); 1808 } 1809 return (rv); 1810 } 1811 1812 static void 1813 mc_error_handler(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr) 1814 { 1815 mc_aflt_t mc_aflt; 1816 mc_flt_stat_t flt_stat, mi_flt_stat; 1817 int mi_valid; 1818 1819 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1820 bzero(&flt_stat, sizeof (mc_flt_stat_t)); 1821 bzero(&mi_flt_stat, sizeof (mc_flt_stat_t)); 1822 1823 mc_aflt.mflt_mcp = mcp; 1824 mc_aflt.mflt_id = gethrtime(); 1825 1826 /* patrol registers */ 1827 mc_read_ptrl_reg(mcp, bank, &flt_stat); 1828 1829 ASSERT(rsaddr); 1830 rsaddr->mi_restartaddr = flt_stat.mf_flt_maddr; 1831 1832 MC_LOG("ptrl registers cntl %x add %x log %x\n", 1833 flt_stat.mf_cntl, 1834 flt_stat.mf_err_add, 1835 flt_stat.mf_err_log); 1836 1837 /* MI registers */ 1838 mc_read_mi_reg(mcp, bank, &mi_flt_stat); 1839 1840 1841 MC_LOG("MI registers cntl %x add %x log %x\n", 1842 mi_flt_stat.mf_cntl, 1843 mi_flt_stat.mf_err_add, 1844 mi_flt_stat.mf_err_log); 1845 1846 /* clear errors once we read all the registers */ 1847 MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1848 1849 mc_aflt.mflt_is_ptrl = 0; 1850 if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) && 1851 ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 1852 ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 1853 mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat); 1854 } 1855 1856 if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1857 MAC_CNTL_PTRL_ERR_SHIFT) == 1858 ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) >> 1859 MAC_CNTL_MI_ERR_SHIFT)) && 1860 (flt_stat.mf_err_add == mi_flt_stat.mf_err_add)) { 1861 #ifdef DEBUG 1862 MC_LOG("discarding PTRL error because " 1863 "it is the same as MI\n"); 1864 #endif 1865 rsaddr->mi_valid = mi_valid; 1866 return; 1867 } 1868 1869 mc_aflt.mflt_is_ptrl = 1; 1870 if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) && 1871 ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 1872 ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 1873 rsaddr->mi_valid = mc_process_error(mcp, bank, 1874 &mc_aflt, &flt_stat); 1875 } 1876 } 1877 /* 1878 * memory patrol error handling algorithm: 1879 * timeout() is used to do periodic polling 1880 * This is the flow chart. 1881 * timeout -> 1882 * mc_check_errors() 1883 * if memory bank is installed, read the status register 1884 * if any error bit is set, 1885 * -> mc_error_handler() 1886 * -> read all error regsiters 1887 * -> mc_process_error() 1888 * determine error type 1889 * rewrite to clear error or scrub to determine CE type 1890 * inform SCF on permanent CE 1891 * -> mc_err_drain 1892 * page offline processing 1893 * -> mc_ereport_post() 1894 */ 1895 1896 static void 1897 mc_check_errors_func(mc_opl_t *mcp) 1898 { 1899 mc_rsaddr_info_t rsaddr_info; 1900 int i, error_count = 0; 1901 uint32_t stat, cntl; 1902 int running; 1903 int wrapped; 1904 int ebk; 1905 1906 /* 1907 * scan errors. 1908 */ 1909 if (mcp->mc_status & MC_MEMORYLESS) 1910 return; 1911 1912 for (i = 0; i < BANKNUM_PER_SB; i++) { 1913 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 1914 stat = ldphysio(MAC_PTRL_STAT(mcp, i)); 1915 cntl = ldphysio(MAC_PTRL_CNTL(mcp, i)); 1916 running = cntl & MAC_CNTL_PTRL_START; 1917 wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX; 1918 1919 /* Compute the effective bank idx */ 1920 ebk = (IS_MIRROR(mcp, i)) ? MIRROR_IDX(i) : i; 1921 1922 if (mc_debug_show_all || stat) { 1923 MC_LOG("/LSB%d/B%d stat %x cntl %x\n", 1924 mcp->mc_board_num, i, 1925 stat, cntl); 1926 } 1927 1928 /* 1929 * Update stats and reset flag if the HW patrol 1930 * wrapped around in its scan. 1931 */ 1932 if (wrapped) { 1933 MAC_CLEAR_MAX(mcp, i); 1934 mcp->mc_period[ebk]++; 1935 if (IS_MIRROR(mcp, i)) 1936 MC_LOG("mirror mc period %ld on " 1937 "/LSB%d/B%d\n", mcp->mc_period[ebk], 1938 mcp->mc_board_num, i); 1939 else { 1940 MC_LOG("mc period %ld on " 1941 "/LSB%d/B%d\n", mcp->mc_period[ebk], 1942 mcp->mc_board_num, i); 1943 } 1944 } 1945 1946 if (running) { 1947 /* 1948 * Mac patrol HW is still running. 1949 * Normally when an error is detected, 1950 * the HW patrol will stop so that we 1951 * can collect error data for reporting. 1952 * Certain errors (MI errors) detected may not 1953 * cause the HW patrol to stop which is a 1954 * problem since we cannot read error data while 1955 * the HW patrol is running. SW is not allowed 1956 * to stop the HW patrol while it is running 1957 * as it may cause HW inconsistency. This is 1958 * described in a HW errata. 1959 * In situations where we detected errors 1960 * that may not cause the HW patrol to stop. 1961 * We speed up the HW patrol scanning in 1962 * the hope that it will find the 'real' PTRL 1963 * errors associated with the previous errors 1964 * causing the HW to finally stop so that we 1965 * can do the reporting. 1966 */ 1967 /* 1968 * Check to see if we did speed up 1969 * the HW patrol due to previous errors 1970 * detected that did not cause the patrol 1971 * to stop. We only do it if HW patrol scan 1972 * wrapped (counted as completing a 'period'). 1973 */ 1974 if (mcp->mc_speedup_period[ebk] > 0) { 1975 if (wrapped && 1976 (--mcp->mc_speedup_period[ebk] == 0)) { 1977 /* 1978 * We did try to speed up. 1979 * The speed up period has expired 1980 * and the HW patrol is still running. 1981 * The errors must be intermittent. 1982 * We have no choice but to ignore 1983 * them, reset the scan speed to normal 1984 * and clear the MI error bits. For 1985 * mirror mode, we need to clear errors 1986 * on both banks. 1987 */ 1988 MC_LOG("Clearing MI errors\n"); 1989 MAC_CLEAR_ERRS(mcp, i, 1990 MAC_CNTL_MI_ERRS); 1991 1992 if (IS_MIRROR(mcp, i)) { 1993 MC_LOG("Clearing Mirror MI errs\n"); 1994 MAC_CLEAR_ERRS(mcp, i^1, 1995 MAC_CNTL_MI_ERRS); 1996 } 1997 } 1998 } else if (stat & MAC_STAT_MI_ERRS) { 1999 /* 2000 * MI errors detected but we cannot 2001 * report them since the HW patrol 2002 * is still running. 2003 * We will attempt to speed up the 2004 * scanning and hopefully the HW 2005 * can detect PRTL errors at the same 2006 * location that cause the HW patrol 2007 * to stop. 2008 */ 2009 mcp->mc_speedup_period[ebk] = 2; 2010 MAC_CMD(mcp, i, 0); 2011 } 2012 } else if (stat & (MAC_STAT_PTRL_ERRS | 2013 MAC_STAT_MI_ERRS)) { 2014 /* 2015 * HW Patrol has stopped and we found errors. 2016 * Proceed to collect and report error info. 2017 */ 2018 mcp->mc_speedup_period[ebk] = 0; 2019 rsaddr_info.mi_valid = 0; 2020 rsaddr_info.mi_injectrestart = 0; 2021 if (IS_MIRROR(mcp, i)) { 2022 mc_error_handler_mir(mcp, i, &rsaddr_info); 2023 } else { 2024 mc_error_handler(mcp, i, &rsaddr_info); 2025 } 2026 2027 error_count++; 2028 restart_patrol(mcp, i, &rsaddr_info); 2029 } else { 2030 /* 2031 * HW patrol scan has apparently stopped 2032 * but no errors detected/flagged. 2033 * Restart the HW patrol just to be sure. 2034 * In mirror mode, the odd bank might have 2035 * reported errors that caused the patrol to 2036 * stop. We'll defer the restart to the odd 2037 * bank in this case. 2038 */ 2039 if (!IS_MIRROR(mcp, i) || (i & 0x1)) 2040 restart_patrol(mcp, i, NULL); 2041 } 2042 } 2043 } 2044 if (error_count > 0) 2045 mcp->mc_last_error += error_count; 2046 else 2047 mcp->mc_last_error = 0; 2048 } 2049 2050 /* 2051 * mc_polling -- Check errors for only one instance, 2052 * but process errors for all instances to make sure we drain the errors 2053 * faster than they can be accumulated. 2054 * 2055 * Polling on each board should be done only once per each 2056 * mc_patrol_interval_sec. This is equivalent to setting mc_tick_left 2057 * to OPL_MAX_BOARDS and decrement by 1 on each timeout. 2058 * Once mc_tick_left becomes negative, the board becomes a candidate 2059 * for polling because it has waited for at least 2060 * mc_patrol_interval_sec's long. If mc_timeout_period is calculated 2061 * differently, this has to beupdated accordingly. 2062 */ 2063 2064 static void 2065 mc_polling(void) 2066 { 2067 int i, scan_error; 2068 mc_opl_t *mcp; 2069 2070 2071 scan_error = 1; 2072 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2073 mutex_enter(&mcmutex); 2074 if ((mcp = mc_instances[i]) == NULL) { 2075 mutex_exit(&mcmutex); 2076 continue; 2077 } 2078 mutex_enter(&mcp->mc_lock); 2079 mutex_exit(&mcmutex); 2080 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2081 mutex_exit(&mcp->mc_lock); 2082 continue; 2083 } 2084 if (scan_error && mcp->mc_tick_left <= 0) { 2085 mc_check_errors_func((void *)mcp); 2086 mcp->mc_tick_left = OPL_MAX_BOARDS; 2087 scan_error = 0; 2088 } else { 2089 mcp->mc_tick_left--; 2090 } 2091 mc_process_scf_log(mcp); 2092 mutex_exit(&mcp->mc_lock); 2093 } 2094 } 2095 2096 static void 2097 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr) 2098 { 2099 maddr->ma_bd = mcp->mc_board_num; 2100 maddr->ma_bank = bank; 2101 maddr->ma_dimm_addr = 0; 2102 } 2103 2104 typedef struct mc_mem_range { 2105 uint64_t addr; 2106 uint64_t size; 2107 } mc_mem_range_t; 2108 2109 static int 2110 get_base_address(mc_opl_t *mcp) 2111 { 2112 mc_mem_range_t *mem_range; 2113 int len; 2114 2115 if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2116 "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) { 2117 return (DDI_FAILURE); 2118 } 2119 2120 mcp->mc_start_address = mem_range->addr; 2121 mcp->mc_size = mem_range->size; 2122 2123 kmem_free(mem_range, len); 2124 return (DDI_SUCCESS); 2125 } 2126 2127 struct mc_addr_spec { 2128 uint32_t bank; 2129 uint32_t phys_hi; 2130 uint32_t phys_lo; 2131 }; 2132 2133 #define REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo) 2134 2135 static char *mc_tbl_name[] = { 2136 "cs0-mc-pa-trans-table", 2137 "cs1-mc-pa-trans-table" 2138 }; 2139 2140 /* 2141 * This routine performs a rangecheck for a given PA 2142 * to see if it belongs to the memory range for this board. 2143 * Return 1 if it is valid (within the range) and 0 otherwise 2144 */ 2145 static int 2146 mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa) 2147 { 2148 if ((pa < mcp->mc_start_address) || 2149 (mcp->mc_start_address + mcp->mc_size <= pa)) 2150 return (0); 2151 else 2152 return (1); 2153 } 2154 2155 static void 2156 mc_memlist_delete(struct memlist *mlist) 2157 { 2158 struct memlist *ml; 2159 2160 for (ml = mlist; ml; ml = mlist) { 2161 mlist = ml->next; 2162 kmem_free(ml, sizeof (struct memlist)); 2163 } 2164 } 2165 2166 static struct memlist * 2167 mc_memlist_dup(struct memlist *mlist) 2168 { 2169 struct memlist *hl = NULL, *tl, **mlp; 2170 2171 if (mlist == NULL) 2172 return (NULL); 2173 2174 mlp = &hl; 2175 tl = *mlp; 2176 for (; mlist; mlist = mlist->next) { 2177 *mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP); 2178 (*mlp)->address = mlist->address; 2179 (*mlp)->size = mlist->size; 2180 (*mlp)->prev = tl; 2181 tl = *mlp; 2182 mlp = &((*mlp)->next); 2183 } 2184 *mlp = NULL; 2185 2186 return (hl); 2187 } 2188 2189 2190 static struct memlist * 2191 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len) 2192 { 2193 uint64_t end; 2194 struct memlist *ml, *tl, *nlp; 2195 2196 if (mlist == NULL) 2197 return (NULL); 2198 2199 end = base + len; 2200 if ((end <= mlist->address) || (base == end)) 2201 return (mlist); 2202 2203 for (tl = ml = mlist; ml; tl = ml, ml = nlp) { 2204 uint64_t mend; 2205 2206 nlp = ml->next; 2207 2208 if (end <= ml->address) 2209 break; 2210 2211 mend = ml->address + ml->size; 2212 if (base < mend) { 2213 if (base <= ml->address) { 2214 ml->address = end; 2215 if (end >= mend) 2216 ml->size = 0ull; 2217 else 2218 ml->size = mend - ml->address; 2219 } else { 2220 ml->size = base - ml->address; 2221 if (end < mend) { 2222 struct memlist *nl; 2223 /* 2224 * splitting an memlist entry. 2225 */ 2226 nl = kmem_alloc(sizeof (struct memlist), 2227 KM_SLEEP); 2228 nl->address = end; 2229 nl->size = mend - nl->address; 2230 if ((nl->next = nlp) != NULL) 2231 nlp->prev = nl; 2232 nl->prev = ml; 2233 ml->next = nl; 2234 nlp = nl; 2235 } 2236 } 2237 if (ml->size == 0ull) { 2238 if (ml == mlist) { 2239 if ((mlist = nlp) != NULL) 2240 nlp->prev = NULL; 2241 kmem_free(ml, sizeof (struct memlist)); 2242 if (mlist == NULL) 2243 break; 2244 ml = nlp; 2245 } else { 2246 if ((tl->next = nlp) != NULL) 2247 nlp->prev = tl; 2248 kmem_free(ml, sizeof (struct memlist)); 2249 ml = tl; 2250 } 2251 } 2252 } 2253 } 2254 2255 return (mlist); 2256 } 2257 2258 static void 2259 mc_get_mlist(mc_opl_t *mcp) 2260 { 2261 struct memlist *mlist; 2262 2263 memlist_read_lock(); 2264 mlist = mc_memlist_dup(phys_install); 2265 memlist_read_unlock(); 2266 2267 if (mlist) { 2268 mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address); 2269 } 2270 2271 if (mlist) { 2272 uint64_t startpa, endpa; 2273 2274 startpa = mcp->mc_start_address + mcp->mc_size; 2275 endpa = ptob(physmax + 1); 2276 if (endpa > startpa) { 2277 mlist = mc_memlist_del_span(mlist, 2278 startpa, endpa - startpa); 2279 } 2280 } 2281 2282 if (mlist) { 2283 mcp->mlist = mlist; 2284 } 2285 } 2286 2287 int 2288 mc_board_add(mc_opl_t *mcp) 2289 { 2290 struct mc_addr_spec *macaddr; 2291 cs_status_t *cs_status; 2292 int len, len1, i, bk, cc; 2293 mc_rsaddr_info_t rsaddr; 2294 uint32_t mirr; 2295 int nbanks = 0; 2296 uint64_t nbytes = 0; 2297 2298 /* 2299 * Get configurations from "pseudo-mc" node which includes: 2300 * board# : LSB number 2301 * mac-addr : physical base address of MAC registers 2302 * csX-mac-pa-trans-table: translation table from DIMM address 2303 * to physical address or vice versa. 2304 */ 2305 mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip, 2306 DDI_PROP_DONTPASS, "board#", -1); 2307 2308 if (mcp->mc_board_num == -1) { 2309 return (DDI_FAILURE); 2310 } 2311 2312 /* 2313 * Get start address in this CAB. It can be gotten from 2314 * "sb-mem-ranges" property. 2315 */ 2316 2317 if (get_base_address(mcp) == DDI_FAILURE) { 2318 return (DDI_FAILURE); 2319 } 2320 /* get mac-pa trans tables */ 2321 for (i = 0; i < MC_TT_CS; i++) { 2322 len = MC_TT_ENTRIES; 2323 cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip, 2324 DDI_PROP_DONTPASS, mc_tbl_name[i], 2325 (caddr_t)mcp->mc_trans_table[i], &len); 2326 2327 if (cc != DDI_SUCCESS) { 2328 bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES); 2329 } 2330 } 2331 mcp->mlist = NULL; 2332 2333 mc_get_mlist(mcp); 2334 2335 /* initialize bank informations */ 2336 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2337 "mc-addr", (caddr_t)&macaddr, &len); 2338 if (cc != DDI_SUCCESS) { 2339 cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc); 2340 return (DDI_FAILURE); 2341 } 2342 2343 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2344 "cs-status", (caddr_t)&cs_status, &len1); 2345 2346 if (cc != DDI_SUCCESS) { 2347 if (len > 0) 2348 kmem_free(macaddr, len); 2349 cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc); 2350 return (DDI_FAILURE); 2351 } 2352 2353 mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL); 2354 2355 for (i = 0; i < len1 / sizeof (cs_status_t); i++) { 2356 nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) | 2357 ((uint64_t)cs_status[i].cs_avail_low); 2358 } 2359 if (len1 > 0) 2360 kmem_free(cs_status, len1); 2361 nbanks = len / sizeof (struct mc_addr_spec); 2362 2363 if (nbanks > 0) 2364 nbytes /= nbanks; 2365 else { 2366 /* No need to free macaddr because len must be 0 */ 2367 mcp->mc_status |= MC_MEMORYLESS; 2368 return (DDI_SUCCESS); 2369 } 2370 2371 for (i = 0; i < BANKNUM_PER_SB; i++) { 2372 mcp->mc_scf_retry[i] = 0; 2373 mcp->mc_period[i] = 0; 2374 mcp->mc_speedup_period[i] = 0; 2375 } 2376 2377 /* 2378 * Get the memory size here. Let it be B (bytes). 2379 * Let T be the time in u.s. to scan 64 bytes. 2380 * If we want to complete 1 round of scanning in P seconds. 2381 * 2382 * B * T * 10^(-6) = P 2383 * --------------- 2384 * 64 2385 * 2386 * T = P * 64 * 10^6 2387 * ------------- 2388 * B 2389 * 2390 * = P * 64 * 10^6 2391 * ------------- 2392 * B 2393 * 2394 * The timing bits are set in PTRL_CNTL[28:26] where 2395 * 2396 * 0 - 1 m.s 2397 * 1 - 512 u.s. 2398 * 10 - 256 u.s. 2399 * 11 - 128 u.s. 2400 * 100 - 64 u.s. 2401 * 101 - 32 u.s. 2402 * 110 - 0 u.s. 2403 * 111 - reserved. 2404 * 2405 * 2406 * a[0] = 110, a[1] = 101, ... a[6] = 0 2407 * 2408 * cs-status property is int x 7 2409 * 0 - cs# 2410 * 1 - cs-status 2411 * 2 - cs-avail.hi 2412 * 3 - cs-avail.lo 2413 * 4 - dimm-capa.hi 2414 * 5 - dimm-capa.lo 2415 * 6 - #of dimms 2416 */ 2417 2418 if (nbytes > 0) { 2419 int i; 2420 uint64_t ms; 2421 ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes; 2422 mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds; 2423 for (i = 0; i < MC_MAX_SPEEDS - 1; i++) { 2424 if (ms < mc_scan_speeds[i + 1].mc_period) { 2425 mcp->mc_speed = mc_scan_speeds[i].mc_speeds; 2426 break; 2427 } 2428 } 2429 } else 2430 mcp->mc_speed = 0; 2431 2432 2433 for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) { 2434 struct mc_bank *bankp; 2435 uint32_t reg; 2436 2437 /* 2438 * setup bank 2439 */ 2440 bk = macaddr[i].bank; 2441 bankp = &(mcp->mc_bank[bk]); 2442 bankp->mcb_status = BANK_INSTALLED; 2443 bankp->mcb_reg_base = REGS_PA(macaddr, i); 2444 2445 reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk)); 2446 bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS); 2447 2448 /* 2449 * check if mirror mode 2450 */ 2451 mirr = LD_MAC_REG(MAC_MIRR(mcp, bk)); 2452 2453 if (mirr & MAC_MIRR_MIRROR_MODE) { 2454 MC_LOG("Mirror -> /LSB%d/B%d\n", 2455 mcp->mc_board_num, bk); 2456 bankp->mcb_status |= BANK_MIRROR_MODE; 2457 /* 2458 * The following bit is only used for 2459 * error injection. We should clear it 2460 */ 2461 if (mirr & MAC_MIRR_BANK_EXCLUSIVE) 2462 ST_MAC_REG(MAC_MIRR(mcp, bk), 2463 0); 2464 } 2465 2466 /* 2467 * restart if not mirror mode or the other bank 2468 * of the mirror is not running 2469 */ 2470 if (!(mirr & MAC_MIRR_MIRROR_MODE) || 2471 !(mcp->mc_bank[bk^1].mcb_status & 2472 BANK_PTRL_RUNNING)) { 2473 MC_LOG("Starting up /LSB%d/B%d\n", 2474 mcp->mc_board_num, bk); 2475 get_ptrl_start_address(mcp, bk, &rsaddr.mi_restartaddr); 2476 rsaddr.mi_valid = 0; 2477 rsaddr.mi_injectrestart = 0; 2478 restart_patrol(mcp, bk, &rsaddr); 2479 } else { 2480 MC_LOG("Not starting up /LSB%d/B%d\n", 2481 mcp->mc_board_num, bk); 2482 } 2483 bankp->mcb_status |= BANK_PTRL_RUNNING; 2484 } 2485 if (len > 0) 2486 kmem_free(macaddr, len); 2487 2488 mcp->mc_dimm_list = mc_get_dimm_list(mcp); 2489 2490 /* 2491 * set interval in HZ. 2492 */ 2493 mcp->mc_last_error = 0; 2494 2495 /* restart memory patrol checking */ 2496 mcp->mc_status |= MC_POLL_RUNNING; 2497 2498 return (DDI_SUCCESS); 2499 } 2500 2501 int 2502 mc_board_del(mc_opl_t *mcp) 2503 { 2504 int i; 2505 scf_log_t *p; 2506 2507 /* 2508 * cleanup mac state 2509 */ 2510 mutex_enter(&mcp->mc_lock); 2511 if (mcp->mc_status & MC_MEMORYLESS) { 2512 mutex_exit(&mcp->mc_lock); 2513 mutex_destroy(&mcp->mc_lock); 2514 return (DDI_SUCCESS); 2515 } 2516 for (i = 0; i < BANKNUM_PER_SB; i++) { 2517 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2518 mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED; 2519 } 2520 } 2521 2522 /* stop memory patrol checking */ 2523 mcp->mc_status &= ~MC_POLL_RUNNING; 2524 2525 /* just throw away all the scf logs */ 2526 for (i = 0; i < BANKNUM_PER_SB; i++) { 2527 while ((p = mcp->mc_scf_log[i]) != NULL) { 2528 mcp->mc_scf_log[i] = p->sl_next; 2529 mcp->mc_scf_total[i]--; 2530 kmem_free(p, sizeof (scf_log_t)); 2531 } 2532 } 2533 2534 if (mcp->mlist) 2535 mc_memlist_delete(mcp->mlist); 2536 2537 if (mcp->mc_dimm_list) 2538 mc_free_dimm_list(mcp->mc_dimm_list); 2539 2540 mutex_exit(&mcp->mc_lock); 2541 2542 mutex_destroy(&mcp->mc_lock); 2543 return (DDI_SUCCESS); 2544 } 2545 2546 int 2547 mc_suspend(mc_opl_t *mcp, uint32_t flag) 2548 { 2549 /* stop memory patrol checking */ 2550 mutex_enter(&mcp->mc_lock); 2551 if (mcp->mc_status & MC_MEMORYLESS) { 2552 mutex_exit(&mcp->mc_lock); 2553 return (DDI_SUCCESS); 2554 } 2555 2556 mcp->mc_status &= ~MC_POLL_RUNNING; 2557 2558 mcp->mc_status |= flag; 2559 mutex_exit(&mcp->mc_lock); 2560 2561 return (DDI_SUCCESS); 2562 } 2563 2564 /* caller must clear the SUSPEND bits or this will do nothing */ 2565 2566 int 2567 mc_resume(mc_opl_t *mcp, uint32_t flag) 2568 { 2569 int i; 2570 uint64_t basepa; 2571 2572 mutex_enter(&mcp->mc_lock); 2573 if (mcp->mc_status & MC_MEMORYLESS) { 2574 mutex_exit(&mcp->mc_lock); 2575 return (DDI_SUCCESS); 2576 } 2577 basepa = mcp->mc_start_address; 2578 if (get_base_address(mcp) == DDI_FAILURE) { 2579 mutex_exit(&mcp->mc_lock); 2580 return (DDI_FAILURE); 2581 } 2582 2583 if (basepa != mcp->mc_start_address) { 2584 if (mcp->mlist) 2585 mc_memlist_delete(mcp->mlist); 2586 mcp->mlist = NULL; 2587 mc_get_mlist(mcp); 2588 } 2589 2590 mcp->mc_status &= ~flag; 2591 2592 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 2593 mutex_exit(&mcp->mc_lock); 2594 return (DDI_SUCCESS); 2595 } 2596 2597 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2598 /* restart memory patrol checking */ 2599 mcp->mc_status |= MC_POLL_RUNNING; 2600 for (i = 0; i < BANKNUM_PER_SB; i++) { 2601 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2602 restart_patrol(mcp, i, NULL); 2603 } 2604 } 2605 } 2606 mutex_exit(&mcp->mc_lock); 2607 2608 return (DDI_SUCCESS); 2609 } 2610 2611 static mc_opl_t * 2612 mc_pa_to_mcp(uint64_t pa) 2613 { 2614 mc_opl_t *mcp; 2615 int i; 2616 2617 ASSERT(MUTEX_HELD(&mcmutex)); 2618 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2619 if ((mcp = mc_instances[i]) == NULL) 2620 continue; 2621 /* if mac patrol is suspended, we cannot rely on it */ 2622 if (!(mcp->mc_status & MC_POLL_RUNNING) || 2623 (mcp->mc_status & MC_SOFT_SUSPENDED)) 2624 continue; 2625 if (mc_rangecheck_pa(mcp, pa)) { 2626 return (mcp); 2627 } 2628 } 2629 return (NULL); 2630 } 2631 2632 /* 2633 * Get Physical Board number from Logical one. 2634 */ 2635 static int 2636 mc_opl_get_physical_board(int sb) 2637 { 2638 if (&opl_get_physical_board) { 2639 return (opl_get_physical_board(sb)); 2640 } 2641 2642 cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n"); 2643 return (-1); 2644 } 2645 2646 /* ARGSUSED */ 2647 int 2648 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen, 2649 int *lenp) 2650 { 2651 int i; 2652 int sb; 2653 int bank; 2654 mc_opl_t *mcp; 2655 char memb_num; 2656 2657 mutex_enter(&mcmutex); 2658 2659 if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) || 2660 (!pa_is_valid(mcp, flt_addr))) { 2661 mutex_exit(&mcmutex); 2662 if (snprintf(buf, buflen, "UNKNOWN") >= buflen) { 2663 return (ENOSPC); 2664 } else { 2665 if (lenp) 2666 *lenp = strlen(buf); 2667 } 2668 return (0); 2669 } 2670 2671 bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address); 2672 sb = mc_opl_get_physical_board(mcp->mc_board_num); 2673 2674 if (sb == -1) { 2675 mutex_exit(&mcmutex); 2676 return (ENXIO); 2677 } 2678 2679 if (plat_model == MODEL_DC) { 2680 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 2681 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s MEM%s MEM%s", 2682 model_names[plat_model].unit_name, sb, 2683 mc_dc_dimm_unum_table[i], mc_dc_dimm_unum_table[i + 1], 2684 mc_dc_dimm_unum_table[i + 2], mc_dc_dimm_unum_table[i + 3]); 2685 } else { 2686 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 2687 memb_num = mc_ff_dimm_unum_table[i][0]; 2688 snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s MEM%s MEM%s", 2689 model_names[plat_model].unit_name, 2690 model_names[plat_model].mem_name, memb_num, 2691 &mc_ff_dimm_unum_table[i][1], 2692 2693 &mc_ff_dimm_unum_table[i + 1][1], 2694 &mc_ff_dimm_unum_table[i + 2][1], 2695 &mc_ff_dimm_unum_table[i + 3][1]); 2696 } 2697 if (lenp) { 2698 *lenp = strlen(buf); 2699 } 2700 mutex_exit(&mcmutex); 2701 return (0); 2702 } 2703 2704 int 2705 opl_mc_suspend(void) 2706 { 2707 mc_opl_t *mcp; 2708 int i; 2709 2710 mutex_enter(&mcmutex); 2711 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2712 if ((mcp = mc_instances[i]) == NULL) 2713 continue; 2714 mc_suspend(mcp, MC_SOFT_SUSPENDED); 2715 } 2716 mutex_exit(&mcmutex); 2717 2718 return (0); 2719 } 2720 2721 int 2722 opl_mc_resume(void) 2723 { 2724 mc_opl_t *mcp; 2725 int i; 2726 2727 mutex_enter(&mcmutex); 2728 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2729 if ((mcp = mc_instances[i]) == NULL) 2730 continue; 2731 mc_resume(mcp, MC_SOFT_SUSPENDED); 2732 } 2733 mutex_exit(&mcmutex); 2734 2735 return (0); 2736 } 2737 static void 2738 insert_mcp(mc_opl_t *mcp) 2739 { 2740 mutex_enter(&mcmutex); 2741 if (mc_instances[mcp->mc_board_num] != NULL) { 2742 MC_LOG("mc-opl instance for board# %d already exists\n", 2743 mcp->mc_board_num); 2744 } 2745 mc_instances[mcp->mc_board_num] = mcp; 2746 mutex_exit(&mcmutex); 2747 } 2748 2749 static void 2750 delete_mcp(mc_opl_t *mcp) 2751 { 2752 mutex_enter(&mcmutex); 2753 mc_instances[mcp->mc_board_num] = 0; 2754 mutex_exit(&mcmutex); 2755 } 2756 2757 /* Error injection interface */ 2758 2759 static void 2760 mc_lock_va(uint64_t pa, caddr_t new_va) 2761 { 2762 tte_t tte; 2763 2764 vtag_flushpage(new_va, (uint64_t)ksfmmup); 2765 sfmmu_memtte(&tte, pa >> PAGESHIFT, 2766 PROC_DATA|HAT_NOSYNC, TTE8K); 2767 tte.tte_intlo |= TTE_LCK_INT; 2768 sfmmu_dtlb_ld_kva(new_va, &tte); 2769 } 2770 2771 static void 2772 mc_unlock_va(caddr_t va) 2773 { 2774 vtag_flushpage(va, (uint64_t)ksfmmup); 2775 } 2776 2777 /* ARGSUSED */ 2778 int 2779 mc_inject_error(int error_type, uint64_t pa, uint32_t flags) 2780 { 2781 mc_opl_t *mcp; 2782 int bank; 2783 uint32_t dimm_addr; 2784 uint32_t cntl; 2785 mc_rsaddr_info_t rsaddr; 2786 uint32_t data, stat; 2787 int both_sides = 0; 2788 uint64_t pa0; 2789 int extra_injection_needed = 0; 2790 extern void cpu_flush_ecache(void); 2791 2792 MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags); 2793 2794 mutex_enter(&mcmutex); 2795 if ((mcp = mc_pa_to_mcp(pa)) == NULL) { 2796 mutex_exit(&mcmutex); 2797 MC_LOG("mc_inject_error: invalid pa\n"); 2798 return (ENOTSUP); 2799 } 2800 2801 mutex_enter(&mcp->mc_lock); 2802 mutex_exit(&mcmutex); 2803 2804 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 2805 mutex_exit(&mcp->mc_lock); 2806 MC_LOG("mc-opl has been suspended. No error injection.\n"); 2807 return (EBUSY); 2808 } 2809 2810 /* convert pa to offset within the board */ 2811 MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address); 2812 2813 if (!pa_is_valid(mcp, pa)) { 2814 mutex_exit(&mcp->mc_lock); 2815 return (EINVAL); 2816 } 2817 2818 pa0 = pa - mcp->mc_start_address; 2819 2820 bank = pa_to_bank(mcp, pa0); 2821 2822 if (flags & MC_INJECT_FLAG_OTHER) 2823 bank = bank ^ 1; 2824 2825 if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) { 2826 mutex_exit(&mcp->mc_lock); 2827 MC_LOG("Not mirror mode\n"); 2828 return (EINVAL); 2829 } 2830 2831 dimm_addr = pa_to_dimm(mcp, pa0); 2832 2833 MC_LOG("injecting error to /LSB%d/B%d/%x\n", 2834 mcp->mc_board_num, bank, dimm_addr); 2835 2836 2837 switch (error_type) { 2838 case MC_INJECT_INTERMITTENT_MCE: 2839 case MC_INJECT_PERMANENT_MCE: 2840 case MC_INJECT_MUE: 2841 both_sides = 1; 2842 } 2843 2844 if (flags & MC_INJECT_FLAG_RESET) 2845 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0); 2846 2847 ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK); 2848 2849 if (both_sides) { 2850 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0); 2851 ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), 2852 dimm_addr & MAC_EG_ADD_MASK); 2853 } 2854 2855 switch (error_type) { 2856 case MC_INJECT_SUE: 2857 extra_injection_needed = 1; 2858 /*FALLTHROUGH*/ 2859 case MC_INJECT_UE: 2860 case MC_INJECT_MUE: 2861 if (flags & MC_INJECT_FLAG_PATH) { 2862 cntl = MAC_EG_ADD_FIX 2863 |MAC_EG_FORCE_READ00|MAC_EG_FORCE_READ16 2864 |MAC_EG_RDERR_ONCE; 2865 } else { 2866 cntl = MAC_EG_ADD_FIX|MAC_EG_FORCE_DERR00 2867 |MAC_EG_FORCE_DERR16|MAC_EG_DERR_ONCE; 2868 } 2869 flags |= MC_INJECT_FLAG_ST; 2870 break; 2871 case MC_INJECT_INTERMITTENT_CE: 2872 case MC_INJECT_INTERMITTENT_MCE: 2873 if (flags & MC_INJECT_FLAG_PATH) { 2874 cntl = MAC_EG_ADD_FIX 2875 |MAC_EG_FORCE_READ00 2876 |MAC_EG_RDERR_ONCE; 2877 } else { 2878 cntl = MAC_EG_ADD_FIX 2879 |MAC_EG_FORCE_DERR16 2880 |MAC_EG_DERR_ONCE; 2881 } 2882 extra_injection_needed = 1; 2883 flags |= MC_INJECT_FLAG_ST; 2884 break; 2885 case MC_INJECT_PERMANENT_CE: 2886 case MC_INJECT_PERMANENT_MCE: 2887 if (flags & MC_INJECT_FLAG_PATH) { 2888 cntl = MAC_EG_ADD_FIX 2889 |MAC_EG_FORCE_READ00 2890 |MAC_EG_RDERR_ALWAYS; 2891 } else { 2892 cntl = MAC_EG_ADD_FIX 2893 |MAC_EG_FORCE_DERR16 2894 |MAC_EG_DERR_ALWAYS; 2895 } 2896 flags |= MC_INJECT_FLAG_ST; 2897 break; 2898 case MC_INJECT_CMPE: 2899 data = 0xabcdefab; 2900 stphys(pa, data); 2901 cpu_flush_ecache(); 2902 MC_LOG("CMPE: writing data %x to %lx\n", data, pa); 2903 ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE); 2904 stphys(pa, data ^ 0xffffffff); 2905 membar_sync(); 2906 cpu_flush_ecache(); 2907 ST_MAC_REG(MAC_MIRR(mcp, bank), 0); 2908 MC_LOG("CMPE: write new data %xto %lx\n", data, pa); 2909 cntl = 0; 2910 break; 2911 case MC_INJECT_NOP: 2912 cntl = 0; 2913 break; 2914 default: 2915 MC_LOG("mc_inject_error: invalid option\n"); 2916 cntl = 0; 2917 } 2918 2919 if (cntl) { 2920 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK); 2921 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 2922 2923 if (both_sides) { 2924 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 2925 MAC_EG_SETUP_MASK); 2926 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 2927 } 2928 } 2929 2930 /* 2931 * For all injection cases except compare error, we 2932 * must write to the PA to trigger the error. 2933 */ 2934 2935 if (flags & MC_INJECT_FLAG_ST) { 2936 data = 0xf0e0d0c0; 2937 MC_LOG("Writing %x to %lx\n", data, pa); 2938 stphys(pa, data); 2939 cpu_flush_ecache(); 2940 } 2941 2942 2943 if (flags & MC_INJECT_FLAG_LD) { 2944 if (flags & MC_INJECT_FLAG_PREFETCH) { 2945 /* 2946 * Use strong prefetch operation to 2947 * inject MI errors. 2948 */ 2949 page_t *pp; 2950 extern void mc_prefetch(caddr_t); 2951 2952 MC_LOG("prefetch\n"); 2953 2954 pp = page_numtopp_nolock(pa >> PAGESHIFT); 2955 if (pp != NULL) { 2956 caddr_t va, va1; 2957 2958 va = ppmapin(pp, PROT_READ|PROT_WRITE, 2959 (caddr_t)-1); 2960 kpreempt_disable(); 2961 mc_lock_va((uint64_t)pa, va); 2962 va1 = va + (pa & (PAGESIZE - 1)); 2963 mc_prefetch(va1); 2964 mc_unlock_va(va); 2965 kpreempt_enable(); 2966 ppmapout(va); 2967 2968 /* 2969 * For MI errors, we need one extra 2970 * injection for HW patrol to stop. 2971 */ 2972 extra_injection_needed = 1; 2973 } else { 2974 cmn_err(CE_WARN, "Cannot find page structure" 2975 " for PA %lx\n", pa); 2976 } 2977 } else { 2978 MC_LOG("Reading from %lx\n", pa); 2979 data = ldphys(pa); 2980 MC_LOG("data = %x\n", data); 2981 } 2982 2983 if (extra_injection_needed) { 2984 /* 2985 * These are the injection cases where the 2986 * requested injected errors will not cause the HW 2987 * patrol to stop. For these cases, we need to inject 2988 * an extra 'real' PTRL error to force the 2989 * HW patrol to stop so that we can report the 2990 * errors injected. Note that we cannot read 2991 * and report error status while the HW patrol 2992 * is running. 2993 */ 2994 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 2995 cntl & MAC_EG_SETUP_MASK); 2996 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 2997 2998 if (both_sides) { 2999 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 3000 MAC_EG_SETUP_MASK); 3001 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 3002 } 3003 data = 0xf0e0d0c0; 3004 MC_LOG("Writing %x to %lx\n", data, pa); 3005 stphys(pa, data); 3006 cpu_flush_ecache(); 3007 } 3008 } 3009 3010 if (flags & MC_INJECT_FLAG_RESTART) { 3011 MC_LOG("Restart patrol\n"); 3012 rsaddr.mi_restartaddr.ma_bd = mcp->mc_board_num; 3013 rsaddr.mi_restartaddr.ma_bank = bank; 3014 rsaddr.mi_restartaddr.ma_dimm_addr = dimm_addr; 3015 rsaddr.mi_valid = 1; 3016 rsaddr.mi_injectrestart = 1; 3017 restart_patrol(mcp, bank, &rsaddr); 3018 } 3019 3020 if (flags & MC_INJECT_FLAG_POLL) { 3021 int running; 3022 int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank; 3023 3024 MC_LOG("Poll patrol error\n"); 3025 stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank)); 3026 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 3027 running = cntl & MAC_CNTL_PTRL_START; 3028 3029 if (!running && 3030 (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) { 3031 /* 3032 * HW patrol stopped and we have errors to 3033 * report. Do it. 3034 */ 3035 mcp->mc_speedup_period[ebank] = 0; 3036 rsaddr.mi_valid = 0; 3037 rsaddr.mi_injectrestart = 0; 3038 if (IS_MIRROR(mcp, bank)) { 3039 mc_error_handler_mir(mcp, bank, &rsaddr); 3040 } else { 3041 mc_error_handler(mcp, bank, &rsaddr); 3042 } 3043 3044 restart_patrol(mcp, bank, &rsaddr); 3045 } else { 3046 /* 3047 * We are expecting to report injected 3048 * errors but the HW patrol is still running. 3049 * Speed up the scanning 3050 */ 3051 mcp->mc_speedup_period[ebank] = 2; 3052 MAC_CMD(mcp, bank, 0); 3053 restart_patrol(mcp, bank, NULL); 3054 } 3055 } 3056 3057 mutex_exit(&mcp->mc_lock); 3058 return (0); 3059 } 3060 3061 void 3062 mc_stphysio(uint64_t pa, uint32_t data) 3063 { 3064 MC_LOG("0x%x -> pa(%lx)\n", data, pa); 3065 stphysio(pa, data); 3066 3067 /* force the above write to be processed by mac patrol */ 3068 data = ldphysio(pa); 3069 MC_LOG("pa(%lx) = 0x%x\n", pa, data); 3070 } 3071 3072 uint32_t 3073 mc_ldphysio(uint64_t pa) 3074 { 3075 uint32_t rv; 3076 3077 rv = ldphysio(pa); 3078 MC_LOG("pa(%lx) = 0x%x\n", pa, rv); 3079 return (rv); 3080 } 3081 3082 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9') 3083 3084 /* 3085 * parse_unum_memory -- extract the board number and the DIMM name from 3086 * the unum. 3087 * 3088 * Return 0 for success and non-zero for a failure. 3089 */ 3090 int 3091 parse_unum_memory(char *unum, int *board, char *dname) 3092 { 3093 char *c; 3094 char x, y, z; 3095 3096 if ((c = strstr(unum, "CMU")) != NULL) { 3097 /* DC Model */ 3098 c += 3; 3099 *board = (uint8_t)stoi(&c); 3100 if ((c = strstr(c, "MEM")) == NULL) { 3101 return (1); 3102 } 3103 c += 3; 3104 if (strlen(c) < 3) { 3105 return (2); 3106 } 3107 if ((!isdigit(c[0])) || (!(isdigit(c[1]))) || 3108 ((c[2] != 'A') && (c[2] != 'B'))) { 3109 return (3); 3110 } 3111 x = c[0]; 3112 y = c[1]; 3113 z = c[2]; 3114 } else if ((c = strstr(unum, "MBU_")) != NULL) { 3115 /* FF1/FF2 Model */ 3116 c += 4; 3117 if ((c[0] != 'A') && (c[0] != 'B')) { 3118 return (4); 3119 } 3120 if ((c = strstr(c, "MEMB")) == NULL) { 3121 return (5); 3122 } 3123 c += 4; 3124 3125 x = c[0]; 3126 *board = ((uint8_t)stoi(&c)) / 4; 3127 if ((c = strstr(c, "MEM")) == NULL) { 3128 return (6); 3129 } 3130 c += 3; 3131 if (strlen(c) < 2) { 3132 return (7); 3133 } 3134 if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) { 3135 return (8); 3136 } 3137 y = c[0]; 3138 z = c[1]; 3139 } else { 3140 return (9); 3141 } 3142 if (*board < 0) { 3143 return (10); 3144 } 3145 dname[0] = x; 3146 dname[1] = y; 3147 dname[2] = z; 3148 dname[3] = '\0'; 3149 return (0); 3150 } 3151 3152 /* 3153 * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and 3154 * the DIMM name. 3155 */ 3156 int 3157 mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 3158 int buflen, int *lenp) 3159 { 3160 int ret = ENODEV; 3161 mc_dimm_info_t *d = NULL; 3162 3163 if ((d = mcp->mc_dimm_list) == NULL) 3164 return (ENOTSUP); 3165 3166 for (; d != NULL; d = d->md_next) { 3167 if (strcmp(d->md_dimmname, dname) == 0) { 3168 break; 3169 } 3170 } 3171 if (d != NULL) { 3172 *lenp = strlen(d->md_serial) + strlen(d->md_partnum); 3173 if (buflen <= *lenp) { 3174 cmn_err(CE_WARN, "mc_get_mem_sid_dimm: " 3175 "buflen is smaller than %d\n", *lenp); 3176 ret = ENOSPC; 3177 } else { 3178 snprintf(buf, buflen, "%s:%s", 3179 d->md_serial, d->md_partnum); 3180 ret = 0; 3181 } 3182 } 3183 MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n", 3184 ret, dname, (ret == 0) ? buf : ""); 3185 return (ret); 3186 } 3187 3188 int 3189 mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, 3190 int bank, uint32_t mf_type, uint32_t d_slot) 3191 { 3192 int sb; 3193 int lenp = buflen; 3194 int id; 3195 int ret; 3196 char *dimmnm; 3197 3198 if ((sb = mc_opl_get_physical_board(lsb)) < 0) { 3199 return (ENODEV); 3200 } 3201 3202 if (mf_type == FLT_TYPE_PERMANENT_CE) { 3203 if (plat_model == MODEL_DC) { 3204 id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 3205 } else { 3206 id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 3207 } 3208 dimmnm = mc_dc_dimm_unum_table[id]; 3209 if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen, 3210 &lenp)) != 0) { 3211 return (ret); 3212 } 3213 } else { 3214 return (1); 3215 } 3216 3217 return (0); 3218 } 3219 3220 /* 3221 * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum. 3222 */ 3223 int 3224 mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 3225 { 3226 int i; 3227 int ret = ENODEV; 3228 int board; 3229 char dname[MCOPL_MAX_DIMMNAME + 1]; 3230 mc_opl_t *mcp; 3231 3232 MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen); 3233 if ((ret = parse_unum_memory(unum, &board, dname)) != 0) { 3234 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3235 unum, ret); 3236 return (EINVAL); 3237 } 3238 3239 if (board < 0) { 3240 MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n", 3241 board, dname); 3242 return (EINVAL); 3243 } 3244 3245 mutex_enter(&mcmutex); 3246 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3247 if ((mcp = mc_instances[i]) == NULL) 3248 continue; 3249 mutex_enter(&mcp->mc_lock); 3250 if (mcp->mc_board_num == board) { 3251 ret = mc_get_mem_sid_dimm(mcp, dname, buf, 3252 buflen, lenp); 3253 mutex_exit(&mcp->mc_lock); 3254 break; 3255 } 3256 mutex_exit(&mcp->mc_lock); 3257 } 3258 mutex_exit(&mcmutex); 3259 return (ret); 3260 } 3261 3262 /* 3263 * mc_get_mem_offset -- get the offset in a DIMM for a given physical address. 3264 */ 3265 int 3266 mc_get_mem_offset(uint64_t paddr, uint64_t *offp) 3267 { 3268 int i; 3269 int ret = ENODEV; 3270 mc_addr_t maddr; 3271 mc_opl_t *mcp; 3272 3273 mutex_enter(&mcmutex); 3274 for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) { 3275 if ((mcp = mc_instances[i]) == NULL) 3276 continue; 3277 mutex_enter(&mcp->mc_lock); 3278 if (!pa_is_valid(mcp, paddr)) { 3279 mutex_exit(&mcp->mc_lock); 3280 continue; 3281 } 3282 if (pa_to_maddr(mcp, paddr, &maddr) == 0) { 3283 *offp = maddr.ma_dimm_addr; 3284 ret = 0; 3285 } 3286 mutex_exit(&mcp->mc_lock); 3287 } 3288 mutex_exit(&mcmutex); 3289 MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n", 3290 ret, paddr, *offp); 3291 return (ret); 3292 } 3293 3294 /* 3295 * dname_to_bankslot - Get the bank and slot number from the DIMM name. 3296 */ 3297 int 3298 dname_to_bankslot(char *dname, int *bank, int *slot) 3299 { 3300 int i; 3301 int tsz; 3302 char **tbl; 3303 3304 if (plat_model == MODEL_DC) { /* DC */ 3305 tbl = mc_dc_dimm_unum_table; 3306 tsz = OPL_MAX_DIMMS; 3307 } else { 3308 tbl = mc_ff_dimm_unum_table; 3309 tsz = 2 * OPL_MAX_DIMMS; 3310 } 3311 3312 for (i = 0; i < tsz; i++) { 3313 if (strcmp(dname, tbl[i]) == 0) { 3314 break; 3315 } 3316 } 3317 if (i == tsz) { 3318 return (1); 3319 } 3320 *bank = INDEX_TO_BANK(i); 3321 *slot = INDEX_TO_SLOT(i); 3322 return (0); 3323 } 3324 3325 /* 3326 * mc_get_mem_addr -- get the physical address of a DIMM corresponding 3327 * to the unum and sid. 3328 */ 3329 int 3330 mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr) 3331 { 3332 int board; 3333 int bank; 3334 int slot; 3335 int i; 3336 int ret = ENODEV; 3337 char dname[MCOPL_MAX_DIMMNAME + 1]; 3338 mc_addr_t maddr; 3339 mc_opl_t *mcp; 3340 3341 MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n", 3342 unum, sid, offset); 3343 if (parse_unum_memory(unum, &board, dname) != 0) { 3344 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3345 unum, ret); 3346 return (EINVAL); 3347 } 3348 3349 if (board < 0) { 3350 MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n", 3351 board, dname); 3352 return (EINVAL); 3353 } 3354 3355 mutex_enter(&mcmutex); 3356 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3357 if ((mcp = mc_instances[i]) == NULL) 3358 continue; 3359 mutex_enter(&mcp->mc_lock); 3360 if (mcp->mc_board_num != board) { 3361 mutex_exit(&mcp->mc_lock); 3362 continue; 3363 } 3364 3365 ret = dname_to_bankslot(dname, &bank, &slot); 3366 MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot); 3367 if (ret != 0) { 3368 MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n"); 3369 ret = ENODEV; 3370 } else { 3371 maddr.ma_bd = board; 3372 maddr.ma_bank = bank; 3373 maddr.ma_dimm_addr = offset; 3374 ret = mcaddr_to_pa(mcp, &maddr, paddr); 3375 if (ret != 0) { 3376 MC_LOG("mc_get_mem_addr: " 3377 "mcaddr_to_pa failed\n"); 3378 ret = ENODEV; 3379 } 3380 } 3381 mutex_exit(&mcp->mc_lock); 3382 } 3383 mutex_exit(&mcmutex); 3384 MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr); 3385 return (ret); 3386 } 3387 3388 static void 3389 mc_free_dimm_list(mc_dimm_info_t *d) 3390 { 3391 mc_dimm_info_t *next; 3392 3393 while (d != NULL) { 3394 next = d->md_next; 3395 kmem_free(d, sizeof (mc_dimm_info_t)); 3396 d = next; 3397 } 3398 } 3399 3400 /* 3401 * mc_get_dimm_list -- get the list of dimms with serial-id info 3402 * from the SP. 3403 */ 3404 mc_dimm_info_t * 3405 mc_get_dimm_list(mc_opl_t *mcp) 3406 { 3407 uint32_t bufsz; 3408 uint32_t maxbufsz; 3409 int ret; 3410 int sexp; 3411 board_dimm_info_t *bd_dimmp; 3412 mc_dimm_info_t *dimm_list = NULL; 3413 3414 maxbufsz = bufsz = sizeof (board_dimm_info_t) + 3415 ((MCOPL_MAX_DIMMNAME + MCOPL_MAX_SERIAL + 3416 MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS); 3417 3418 bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP); 3419 ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz); 3420 3421 MC_LOG("mc_get_dimm_list: scf_service_getinfo returned=%d\n", ret); 3422 if (ret == 0) { 3423 sexp = sizeof (board_dimm_info_t) + 3424 ((bd_dimmp->bd_dnamesz + bd_dimmp->bd_serialsz + 3425 bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms); 3426 3427 if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) && 3428 (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) && 3429 (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) && 3430 (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) && 3431 (sexp <= bufsz)) { 3432 3433 #ifdef DEBUG 3434 if (oplmc_debug) 3435 mc_dump_dimm_info(bd_dimmp); 3436 #endif 3437 dimm_list = mc_prepare_dimmlist(bd_dimmp); 3438 3439 } else { 3440 cmn_err(CE_WARN, "DIMM info version mismatch\n"); 3441 } 3442 } 3443 kmem_free(bd_dimmp, maxbufsz); 3444 MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", dimm_list); 3445 return (dimm_list); 3446 } 3447 3448 /* 3449 * mc_prepare_dimmlist - Prepare the dimm list from the infomation 3450 * recieved from the SP. 3451 */ 3452 mc_dimm_info_t * 3453 mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp) 3454 { 3455 char *dimm_name; 3456 char *serial; 3457 char *part; 3458 int dimm; 3459 int dnamesz = bd_dimmp->bd_dnamesz; 3460 int sersz = bd_dimmp->bd_serialsz; 3461 int partsz = bd_dimmp->bd_partnumsz; 3462 mc_dimm_info_t *dimm_list = NULL; 3463 mc_dimm_info_t *d; 3464 3465 dimm_name = (char *)(bd_dimmp + 1); 3466 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3467 3468 d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t), 3469 KM_SLEEP); 3470 snprintf(d->md_dimmname, dnamesz + 1, "%s", dimm_name); 3471 serial = dimm_name + dnamesz; 3472 snprintf(d->md_serial, sersz + 1, "%s", serial); 3473 part = serial + sersz; 3474 snprintf(d->md_partnum, partsz + 1, "%s", part); 3475 3476 d->md_next = dimm_list; 3477 dimm_list = d; 3478 dimm_name = part + partsz; 3479 } 3480 return (dimm_list); 3481 } 3482 3483 #ifdef DEBUG 3484 void 3485 mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz) 3486 { 3487 char dname[MCOPL_MAX_DIMMNAME + 1]; 3488 char serial[MCOPL_MAX_SERIAL + 1]; 3489 char part[ MCOPL_MAX_PARTNUM + 1]; 3490 char *b; 3491 3492 b = buf; 3493 snprintf(dname, dnamesz + 1, "%s", b); 3494 b += dnamesz; 3495 snprintf(serial, serialsz + 1, "%s", b); 3496 b += serialsz; 3497 snprintf(part, partnumsz + 1, "%s", b); 3498 printf("DIMM=%s Serial=%s PartNum=%s\n", dname, serial, part); 3499 } 3500 3501 void 3502 mc_dump_dimm_info(board_dimm_info_t *bd_dimmp) 3503 { 3504 int dimm; 3505 int dnamesz = bd_dimmp->bd_dnamesz; 3506 int sersz = bd_dimmp->bd_serialsz; 3507 int partsz = bd_dimmp->bd_partnumsz; 3508 char *buf; 3509 3510 printf("Version=%d Board=%02d DIMMs=%d NameSize=%d " 3511 "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version, 3512 bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz, 3513 bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz); 3514 printf("======================================================\n"); 3515 3516 buf = (char *)(bd_dimmp + 1); 3517 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3518 mc_dump_dimm(buf, dnamesz, sersz, partsz); 3519 buf += dnamesz + sersz + partsz; 3520 } 3521 printf("======================================================\n"); 3522 } 3523 3524 3525 /* ARGSUSED */ 3526 static int 3527 mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 3528 int *rvalp) 3529 { 3530 caddr_t buf; 3531 uint64_t pa; 3532 int rv = 0; 3533 int i; 3534 uint32_t flags; 3535 static uint32_t offset = 0; 3536 3537 3538 flags = (cmd >> 4) & 0xfffffff; 3539 3540 cmd &= 0xf; 3541 3542 MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags); 3543 3544 if (arg != NULL) { 3545 if (ddi_copyin((const void *)arg, (void *)&pa, 3546 sizeof (uint64_t), 0) < 0) { 3547 rv = EFAULT; 3548 return (rv); 3549 } 3550 buf = NULL; 3551 } else { 3552 buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP); 3553 3554 pa = va_to_pa(buf); 3555 pa += offset; 3556 3557 offset += 64; 3558 if (offset >= PAGESIZE) 3559 offset = 0; 3560 } 3561 3562 switch (cmd) { 3563 case MCI_CE: 3564 mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, 3565 flags); 3566 break; 3567 case MCI_PERM_CE: 3568 mc_inject_error(MC_INJECT_PERMANENT_CE, pa, 3569 flags); 3570 break; 3571 case MCI_UE: 3572 mc_inject_error(MC_INJECT_UE, pa, 3573 flags); 3574 break; 3575 case MCI_M_CE: 3576 mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, 3577 flags); 3578 break; 3579 case MCI_M_PCE: 3580 mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, 3581 flags); 3582 break; 3583 case MCI_M_UE: 3584 mc_inject_error(MC_INJECT_MUE, pa, 3585 flags); 3586 break; 3587 case MCI_CMP: 3588 mc_inject_error(MC_INJECT_CMPE, pa, 3589 flags); 3590 break; 3591 case MCI_NOP: 3592 mc_inject_error(MC_INJECT_NOP, pa, flags); 3593 break; 3594 case MCI_SHOW_ALL: 3595 mc_debug_show_all = 1; 3596 break; 3597 case MCI_SHOW_NONE: 3598 mc_debug_show_all = 0; 3599 break; 3600 case MCI_ALLOC: 3601 /* 3602 * just allocate some kernel memory and never free it 3603 * 512 MB seems to be the maximum size supported. 3604 */ 3605 cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512); 3606 for (i = 0; i < flags; i++) { 3607 buf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP); 3608 cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n", 3609 (u_longlong_t)buf, (u_longlong_t)va_to_pa(buf)); 3610 } 3611 break; 3612 case MCI_SUSPEND: 3613 (void) opl_mc_suspend(); 3614 break; 3615 case MCI_RESUME: 3616 (void) opl_mc_resume(); 3617 break; 3618 default: 3619 rv = ENXIO; 3620 } 3621 return (rv); 3622 } 3623 3624 #endif /* DEBUG */ 3625