1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2006 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/conf.h> 34 #include <sys/modctl.h> 35 #include <sys/stat.h> 36 #include <sys/async.h> 37 #include <sys/machcpuvar.h> 38 #include <sys/machsystm.h> 39 #include <sys/promif.h> 40 #include <sys/ksynch.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/ddifm.h> 44 #include <sys/fm/protocol.h> 45 #include <sys/fm/util.h> 46 #include <sys/kmem.h> 47 #include <sys/fm/io/opl_mc_fm.h> 48 #include <sys/memlist.h> 49 #include <sys/param.h> 50 #include <sys/disp.h> 51 #include <vm/page.h> 52 #include <sys/mc-opl.h> 53 #include <sys/opl.h> 54 #include <sys/opl_dimm.h> 55 #include <sys/scfd/scfostoescf.h> 56 #include <sys/cpu_module.h> 57 #include <vm/seg_kmem.h> 58 #include <sys/vmem.h> 59 #include <vm/hat_sfmmu.h> 60 #include <sys/vmsystm.h> 61 62 /* 63 * Function prototypes 64 */ 65 static int mc_open(dev_t *, int, int, cred_t *); 66 static int mc_close(dev_t, int, int, cred_t *); 67 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 68 static int mc_attach(dev_info_t *, ddi_attach_cmd_t); 69 static int mc_detach(dev_info_t *, ddi_detach_cmd_t); 70 71 static int mc_poll_init(void); 72 static void mc_poll_fini(void); 73 static int mc_board_add(mc_opl_t *mcp); 74 static int mc_board_del(mc_opl_t *mcp); 75 static int mc_suspend(mc_opl_t *mcp, uint32_t flag); 76 static int mc_resume(mc_opl_t *mcp, uint32_t flag); 77 int opl_mc_suspend(void); 78 int opl_mc_resume(void); 79 80 static void insert_mcp(mc_opl_t *mcp); 81 static void delete_mcp(mc_opl_t *mcp); 82 83 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr); 84 85 static int mc_valid_pa(mc_opl_t *mcp, uint64_t pa); 86 87 int mc_get_mem_unum(int, uint64_t, char *, int, int *); 88 int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr); 89 int mc_get_mem_offset(uint64_t paddr, uint64_t *offp); 90 int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp); 91 int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 92 int buflen, int *lenp); 93 mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp); 94 mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp); 95 int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank, 96 uint32_t mf_type, uint32_t d_slot); 97 static void mc_free_dimm_list(mc_dimm_info_t *d); 98 static void mc_get_mlist(mc_opl_t *); 99 static void mc_polling(void); 100 static int mc_opl_get_physical_board(int); 101 102 #ifdef DEBUG 103 static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *); 104 void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz); 105 void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp); 106 #endif 107 108 #pragma weak opl_get_physical_board 109 extern int opl_get_physical_board(int); 110 extern int plat_max_boards(void); 111 112 /* 113 * Configuration data structures 114 */ 115 static struct cb_ops mc_cb_ops = { 116 mc_open, /* open */ 117 mc_close, /* close */ 118 nulldev, /* strategy */ 119 nulldev, /* print */ 120 nodev, /* dump */ 121 nulldev, /* read */ 122 nulldev, /* write */ 123 mc_ioctl, /* ioctl */ 124 nodev, /* devmap */ 125 nodev, /* mmap */ 126 nodev, /* segmap */ 127 nochpoll, /* poll */ 128 ddi_prop_op, /* cb_prop_op */ 129 0, /* streamtab */ 130 D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flag */ 131 CB_REV, /* rev */ 132 nodev, /* cb_aread */ 133 nodev /* cb_awrite */ 134 }; 135 136 static struct dev_ops mc_ops = { 137 DEVO_REV, /* rev */ 138 0, /* refcnt */ 139 ddi_getinfo_1to1, /* getinfo */ 140 nulldev, /* identify */ 141 nulldev, /* probe */ 142 mc_attach, /* attach */ 143 mc_detach, /* detach */ 144 nulldev, /* reset */ 145 &mc_cb_ops, /* cb_ops */ 146 (struct bus_ops *)0, /* bus_ops */ 147 nulldev /* power */ 148 }; 149 150 /* 151 * Driver globals 152 */ 153 154 static enum { 155 MODEL_FF1 = 0, 156 MODEL_FF2 = 1, 157 MODEL_DC = 2 158 } plat_model = MODEL_DC; /* The default behaviour is DC */ 159 160 static struct plat_model_names { 161 const char *unit_name; 162 const char *mem_name; 163 } model_names[] = { 164 { "MBU_A", "MEMB" }, 165 { "MBU_B", "MEMB" }, 166 { "CMU", "" } 167 }; 168 169 /* 170 * The DIMM Names for DC platform. 171 * The index into this table is made up of (bank, dslot), 172 * Where dslot occupies bits 0-1 and bank occupies 2-4. 173 */ 174 static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = { 175 /* --------CMUnn----------- */ 176 /* --CS0-----|--CS1------ */ 177 /* -H-|--L-- | -H- | -L-- */ 178 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 179 "13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */ 180 "23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */ 181 "33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */ 182 "01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */ 183 "11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */ 184 "21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */ 185 "31A", "30A", "31B", "30B" /* Bank 7 (MAC 3 bank 1) */ 186 }; 187 188 /* 189 * The DIMM Names for FF1/FF2 platforms. 190 * The index into this table is made up of (board, bank, dslot), 191 * Where dslot occupies bits 0-1, bank occupies 2-4 and 192 * board occupies the bit 5. 193 */ 194 static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = { 195 /* --------CMU0---------- */ 196 /* --CS0-----|--CS1------ */ 197 /* -H-|--L-- | -H- | -L-- */ 198 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 199 "01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */ 200 "13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */ 201 "11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */ 202 "23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */ 203 "21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */ 204 "33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */ 205 "31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */ 206 /* --------CMU1---------- */ 207 /* --CS0-----|--CS1------ */ 208 /* -H-|--L-- | -H- | -L-- */ 209 "43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */ 210 "41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */ 211 "53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */ 212 "51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */ 213 "63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */ 214 "61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */ 215 "73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */ 216 "71A", "70A", "71B", "70B" /* Bank 7 (MAC 3 bank 1) */ 217 }; 218 219 #define BD_BK_SLOT_TO_INDEX(bd, bk, s) \ 220 (((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03)) 221 222 #define INDEX_TO_BANK(i) (((i) & 0x1C) >> 2) 223 #define INDEX_TO_SLOT(i) ((i) & 0x03) 224 225 /* Isolation unit size is 64 MB */ 226 #define MC_ISOLATION_BSIZE (64 * 1024 * 1024) 227 228 #define MC_MAX_SPEEDS 7 229 230 typedef struct { 231 uint32_t mc_speeds; 232 uint32_t mc_period; 233 } mc_scan_speed_t; 234 235 #define MC_CNTL_SPEED_SHIFT 26 236 237 static mc_scan_speed_t mc_scan_speeds[MC_MAX_SPEEDS] = { 238 {0x6 << MC_CNTL_SPEED_SHIFT, 0}, 239 {0x5 << MC_CNTL_SPEED_SHIFT, 32}, 240 {0x4 << MC_CNTL_SPEED_SHIFT, 64}, 241 {0x3 << MC_CNTL_SPEED_SHIFT, 128}, 242 {0x2 << MC_CNTL_SPEED_SHIFT, 256}, 243 {0x1 << MC_CNTL_SPEED_SHIFT, 512}, 244 {0x0 << MC_CNTL_SPEED_SHIFT, 1024} 245 }; 246 247 static uint32_t mc_max_speed = (0x6 << 26); 248 249 int mc_isolation_bsize = MC_ISOLATION_BSIZE; 250 int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC; 251 int mc_max_scf_retry = 16; 252 int mc_max_scf_logs = 64; 253 int mc_max_errlog_processed = BANKNUM_PER_SB*2; 254 int mc_scan_period = 12 * 60 * 60; /* 12 hours period */ 255 int mc_max_rewrite_loop = 100; 256 int mc_rewrite_delay = 10; 257 /* 258 * it takes SCF about 300 m.s. to process a requst. We can bail out 259 * if it is busy. It does not pay to wait for it too long. 260 */ 261 int mc_max_scf_loop = 2; 262 int mc_scf_delay = 100; 263 int mc_pce_dropped = 0; 264 int mc_poll_priority = MINCLSYSPRI; 265 266 267 /* 268 * Mutex heierachy in mc-opl 269 * If both mcmutex and mc_lock must be held, 270 * mcmutex must be acquired first, and then mc_lock. 271 */ 272 273 static kmutex_t mcmutex; 274 mc_opl_t *mc_instances[OPL_MAX_BOARDS]; 275 276 static kmutex_t mc_polling_lock; 277 static kcondvar_t mc_polling_cv; 278 static kcondvar_t mc_poll_exit_cv; 279 static int mc_poll_cmd = 0; 280 static int mc_pollthr_running = 0; 281 int mc_timeout_period = 0; /* this is in m.s. */ 282 void *mc_statep; 283 284 #ifdef DEBUG 285 int oplmc_debug = 0; 286 #endif 287 288 static int mc_debug_show_all = 0; 289 290 extern struct mod_ops mod_driverops; 291 292 static struct modldrv modldrv = { 293 &mod_driverops, /* module type, this one is a driver */ 294 "OPL Memory-controller %I%", /* module name */ 295 &mc_ops, /* driver ops */ 296 }; 297 298 static struct modlinkage modlinkage = { 299 MODREV_1, /* rev */ 300 (void *)&modldrv, 301 NULL 302 }; 303 304 #pragma weak opl_get_mem_unum 305 #pragma weak opl_get_mem_sid 306 #pragma weak opl_get_mem_offset 307 #pragma weak opl_get_mem_addr 308 309 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 310 extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 311 extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 312 extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset, 313 uint64_t *paddr); 314 315 316 /* 317 * pseudo-mc node portid format 318 * 319 * [10] = 0 320 * [9] = 1 321 * [8] = LSB_ID[4] = 0 322 * [7:4] = LSB_ID[3:0] 323 * [3:0] = 0 324 * 325 */ 326 327 /* 328 * These are the module initialization routines. 329 */ 330 int 331 _init(void) 332 { 333 int error; 334 int plen; 335 char model[20]; 336 pnode_t node; 337 338 339 if ((error = ddi_soft_state_init(&mc_statep, 340 sizeof (mc_opl_t), 1)) != 0) 341 return (error); 342 343 if ((error = mc_poll_init()) != 0) { 344 ddi_soft_state_fini(&mc_statep); 345 return (error); 346 } 347 348 mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL); 349 if (&opl_get_mem_unum) 350 opl_get_mem_unum = mc_get_mem_unum; 351 if (&opl_get_mem_sid) 352 opl_get_mem_sid = mc_get_mem_sid; 353 if (&opl_get_mem_offset) 354 opl_get_mem_offset = mc_get_mem_offset; 355 if (&opl_get_mem_addr) 356 opl_get_mem_addr = mc_get_mem_addr; 357 358 node = prom_rootnode(); 359 plen = prom_getproplen(node, "model"); 360 361 if (plen > 0 && plen < sizeof (model)) { 362 (void) prom_getprop(node, "model", model); 363 model[plen] = '\0'; 364 if (strcmp(model, "FF1") == 0) 365 plat_model = MODEL_FF1; 366 else if (strcmp(model, "FF2") == 0) 367 plat_model = MODEL_FF2; 368 else if (strncmp(model, "DC", 2) == 0) 369 plat_model = MODEL_DC; 370 } 371 372 error = mod_install(&modlinkage); 373 if (error != 0) { 374 if (&opl_get_mem_unum) 375 opl_get_mem_unum = NULL; 376 if (&opl_get_mem_sid) 377 opl_get_mem_sid = NULL; 378 if (&opl_get_mem_offset) 379 opl_get_mem_offset = NULL; 380 if (&opl_get_mem_addr) 381 opl_get_mem_addr = NULL; 382 mutex_destroy(&mcmutex); 383 mc_poll_fini(); 384 ddi_soft_state_fini(&mc_statep); 385 } 386 return (error); 387 } 388 389 int 390 _fini(void) 391 { 392 int error; 393 394 if ((error = mod_remove(&modlinkage)) != 0) 395 return (error); 396 397 if (&opl_get_mem_unum) 398 opl_get_mem_unum = NULL; 399 if (&opl_get_mem_sid) 400 opl_get_mem_sid = NULL; 401 if (&opl_get_mem_offset) 402 opl_get_mem_offset = NULL; 403 if (&opl_get_mem_addr) 404 opl_get_mem_addr = NULL; 405 406 mutex_destroy(&mcmutex); 407 mc_poll_fini(); 408 ddi_soft_state_fini(&mc_statep); 409 410 return (0); 411 } 412 413 int 414 _info(struct modinfo *modinfop) 415 { 416 return (mod_info(&modlinkage, modinfop)); 417 } 418 419 static void 420 mc_polling_thread() 421 { 422 mutex_enter(&mc_polling_lock); 423 mc_pollthr_running = 1; 424 while (!(mc_poll_cmd & MC_POLL_EXIT)) { 425 mc_polling(); 426 cv_timedwait(&mc_polling_cv, &mc_polling_lock, 427 ddi_get_lbolt() + mc_timeout_period); 428 } 429 mc_pollthr_running = 0; 430 431 /* 432 * signal if any one is waiting for this thread to exit. 433 */ 434 cv_signal(&mc_poll_exit_cv); 435 mutex_exit(&mc_polling_lock); 436 thread_exit(); 437 /* NOTREACHED */ 438 } 439 440 static int 441 mc_poll_init() 442 { 443 mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL); 444 cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL); 445 cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL); 446 return (0); 447 } 448 449 static void 450 mc_poll_fini() 451 { 452 mutex_enter(&mc_polling_lock); 453 if (mc_pollthr_running) { 454 mc_poll_cmd = MC_POLL_EXIT; 455 cv_signal(&mc_polling_cv); 456 while (mc_pollthr_running) { 457 cv_wait(&mc_poll_exit_cv, &mc_polling_lock); 458 } 459 } 460 mutex_exit(&mc_polling_lock); 461 mutex_destroy(&mc_polling_lock); 462 cv_destroy(&mc_polling_cv); 463 cv_destroy(&mc_poll_exit_cv); 464 } 465 466 static int 467 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 468 { 469 mc_opl_t *mcp; 470 int instance; 471 int rv; 472 473 /* get the instance of this devi */ 474 instance = ddi_get_instance(devi); 475 476 switch (cmd) { 477 case DDI_ATTACH: 478 break; 479 case DDI_RESUME: 480 mcp = ddi_get_soft_state(mc_statep, instance); 481 rv = mc_resume(mcp, MC_DRIVER_SUSPENDED); 482 return (rv); 483 default: 484 return (DDI_FAILURE); 485 } 486 487 if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS) 488 return (DDI_FAILURE); 489 490 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 491 goto bad; 492 } 493 494 if (mc_timeout_period == 0) { 495 mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi, 496 DDI_PROP_DONTPASS, "mc-timeout-interval-sec", 497 mc_patrol_interval_sec); 498 mc_timeout_period = drv_usectohz( 499 1000000 * mc_patrol_interval_sec / OPL_MAX_BOARDS); 500 } 501 502 /* set informations in mc state */ 503 mcp->mc_dip = devi; 504 505 if (mc_board_add(mcp)) 506 goto bad; 507 508 insert_mcp(mcp); 509 510 /* 511 * Start the polling thread if it is not running already. 512 */ 513 mutex_enter(&mc_polling_lock); 514 if (!mc_pollthr_running) { 515 (void) thread_create(NULL, 0, (void (*)())mc_polling_thread, 516 NULL, 0, &p0, TS_RUN, mc_poll_priority); 517 } 518 mutex_exit(&mc_polling_lock); 519 ddi_report_dev(devi); 520 521 return (DDI_SUCCESS); 522 523 bad: 524 ddi_soft_state_free(mc_statep, instance); 525 return (DDI_FAILURE); 526 } 527 528 /* ARGSUSED */ 529 static int 530 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 531 { 532 int rv; 533 int instance; 534 mc_opl_t *mcp; 535 536 /* get the instance of this devi */ 537 instance = ddi_get_instance(devi); 538 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 539 return (DDI_FAILURE); 540 } 541 542 switch (cmd) { 543 case DDI_SUSPEND: 544 rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED); 545 return (rv); 546 case DDI_DETACH: 547 break; 548 default: 549 return (DDI_FAILURE); 550 } 551 552 delete_mcp(mcp); 553 if (mc_board_del(mcp) != DDI_SUCCESS) { 554 return (DDI_FAILURE); 555 } 556 557 /* free up the soft state */ 558 ddi_soft_state_free(mc_statep, instance); 559 560 return (DDI_SUCCESS); 561 } 562 563 /* ARGSUSED */ 564 static int 565 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp) 566 { 567 return (0); 568 } 569 570 /* ARGSUSED */ 571 static int 572 mc_close(dev_t devp, int flag, int otyp, cred_t *credp) 573 { 574 return (0); 575 } 576 577 /* ARGSUSED */ 578 static int 579 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 580 int *rvalp) 581 { 582 #ifdef DEBUG 583 return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp)); 584 #else 585 return (ENXIO); 586 #endif 587 } 588 589 /* 590 * PA validity check: 591 * This function return 1 if the PA is valid, otherwise 592 * return 0. 593 */ 594 595 /* ARGSUSED */ 596 static int 597 pa_is_valid(mc_opl_t *mcp, uint64_t addr) 598 { 599 /* 600 * Check if the addr is on the board. 601 */ 602 if ((addr < mcp->mc_start_address) || 603 (mcp->mc_start_address + mcp->mc_size <= addr)) 604 return (0); 605 606 if (mcp->mlist == NULL) 607 mc_get_mlist(mcp); 608 609 if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) { 610 return (1); 611 } 612 return (0); 613 } 614 615 /* 616 * mac-pa translation routines. 617 * 618 * Input: mc driver state, (LSB#, Bank#, DIMM address) 619 * Output: physical address 620 * 621 * Valid - return value: 0 622 * Invalid - return value: -1 623 */ 624 static int 625 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa) 626 { 627 int i; 628 uint64_t pa_offset = 0; 629 int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1; 630 int bank = maddr->ma_bank; 631 mc_addr_t maddr1; 632 int bank0, bank1; 633 634 MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank, 635 maddr->ma_dimm_addr); 636 637 /* loc validity check */ 638 ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd); 639 ASSERT(bank >= 0 && OPL_BANK_MAX > bank); 640 641 /* Do translation */ 642 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 643 int pa_bit = 0; 644 int mc_bit = mcp->mc_trans_table[cs][i]; 645 if (mc_bit < MC_ADDRESS_BITS) { 646 pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1; 647 } else if (mc_bit == MP_NONE) { 648 pa_bit = 0; 649 } else if (mc_bit == MP_BANK_0) { 650 pa_bit = bank & 1; 651 } else if (mc_bit == MP_BANK_1) { 652 pa_bit = (bank >> 1) & 1; 653 } else if (mc_bit == MP_BANK_2) { 654 pa_bit = (bank >> 2) & 1; 655 } 656 pa_offset |= ((uint64_t)pa_bit) << i; 657 } 658 *pa = mcp->mc_start_address + pa_offset; 659 MC_LOG("pa = %lx\n", *pa); 660 661 if (pa_to_maddr(mcp, *pa, &maddr1) == -1) { 662 cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to " 663 "convert PA %lx\n", maddr->ma_bd, bank, 664 maddr->ma_dimm_addr, *pa); 665 return (-1); 666 } 667 668 /* 669 * In mirror mode, PA is always translated to the even bank. 670 */ 671 if (IS_MIRROR(mcp, maddr->ma_bank)) { 672 bank0 = maddr->ma_bank & ~(1); 673 bank1 = maddr1.ma_bank & ~(1); 674 } else { 675 bank0 = maddr->ma_bank; 676 bank1 = maddr1.ma_bank; 677 } 678 /* 679 * there is no need to check ma_bd because it is generated from 680 * mcp. They are the same. 681 */ 682 if ((bank0 == bank1) && 683 (maddr->ma_dimm_addr == maddr1.ma_dimm_addr)) { 684 return (0); 685 } else { 686 cmn_err(CE_WARN, "Translation error source /LSB%d/B%d/%x, " 687 "PA %lx, target /LSB%d/B%d/%x\n", 688 maddr->ma_bd, bank, maddr->ma_dimm_addr, 689 *pa, maddr1.ma_bd, maddr1.ma_bank, 690 maddr1.ma_dimm_addr); 691 return (-1); 692 } 693 } 694 695 /* 696 * PA to CS (used by pa_to_maddr). 697 */ 698 static int 699 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset) 700 { 701 int i; 702 int cs = 0; 703 704 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 705 /* MAC address bit<29> is arranged on the same PA bit */ 706 /* on both table. So we may use any table. */ 707 if (mcp->mc_trans_table[0][i] == CS_SHIFT) { 708 cs = (pa_offset >> i) & 1; 709 break; 710 } 711 } 712 return (cs); 713 } 714 715 /* 716 * PA to DIMM (used by pa_to_maddr). 717 */ 718 /* ARGSUSED */ 719 static uint32_t 720 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset) 721 { 722 int i; 723 int cs = pa_to_cs(mcp, pa_offset); 724 uint32_t dimm_addr = 0; 725 726 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 727 int pa_bit_value = (pa_offset >> i) & 1; 728 int mc_bit = mcp->mc_trans_table[cs][i]; 729 if (mc_bit < MC_ADDRESS_BITS) { 730 dimm_addr |= pa_bit_value << mc_bit; 731 } 732 } 733 return (dimm_addr); 734 } 735 736 /* 737 * PA to Bank (used by pa_to_maddr). 738 */ 739 static int 740 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset) 741 { 742 int i; 743 int cs = pa_to_cs(mcp, pa_offset); 744 int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT]; 745 746 747 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 748 int pa_bit_value = (pa_offset >> i) & 1; 749 int mc_bit = mcp->mc_trans_table[cs][i]; 750 switch (mc_bit) { 751 case MP_BANK_0: 752 bankno |= pa_bit_value; 753 break; 754 case MP_BANK_1: 755 bankno |= pa_bit_value << 1; 756 break; 757 case MP_BANK_2: 758 bankno |= pa_bit_value << 2; 759 break; 760 } 761 } 762 763 return (bankno); 764 } 765 766 /* 767 * PA to MAC address translation 768 * 769 * Input: MAC driver state, physicall adress 770 * Output: LSB#, Bank id, mac address 771 * 772 * Valid - return value: 0 773 * Invalid - return value: -1 774 */ 775 776 int 777 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr) 778 { 779 uint64_t pa_offset; 780 781 /* PA validity check */ 782 if (!pa_is_valid(mcp, pa)) 783 return (-1); 784 785 786 /* Do translation */ 787 pa_offset = pa - mcp->mc_start_address; 788 789 maddr->ma_bd = mcp->mc_board_num; 790 maddr->ma_bank = pa_to_bank(mcp, pa_offset); 791 maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset); 792 MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", 793 pa_offset, maddr->ma_bd, maddr->ma_bank, maddr->ma_dimm_addr); 794 return (0); 795 } 796 797 /* 798 * UNUM format for DC is "/CMUnn/MEMxyZ", where 799 * nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3. 800 * x = MAC 0..3 801 * y = 0..3 (slot info). 802 * Z = 'A' or 'B' 803 * 804 * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where 805 * x = 0..3 (MEMB number) 806 * y = 0..3 (slot info). 807 * Z = 'A' or 'B' 808 * 809 * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ" 810 * x = 0..7 (MEMB number) 811 * y = 0..3 (slot info). 812 * Z = 'A' or 'B' 813 */ 814 int 815 mc_set_mem_unum(char *buf, int buflen, int lsb, int bank, 816 uint32_t mf_type, uint32_t d_slot) 817 { 818 char *dimmnm; 819 char memb_num; 820 int sb; 821 int i; 822 823 if ((sb = mc_opl_get_physical_board(lsb)) < 0) 824 return (ENODEV); 825 826 if (plat_model == MODEL_DC) { 827 if (mf_type == FLT_TYPE_PERMANENT_CE) { 828 i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 829 dimmnm = mc_dc_dimm_unum_table[i]; 830 snprintf(buf, buflen, "/%s%02d/MEM%s", 831 model_names[plat_model].unit_name, sb, dimmnm); 832 } else { 833 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 834 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s MEM%s MEM%s", 835 model_names[plat_model].unit_name, sb, 836 mc_dc_dimm_unum_table[i], 837 mc_dc_dimm_unum_table[i + 1], 838 mc_dc_dimm_unum_table[i + 2], 839 mc_dc_dimm_unum_table[i + 3]); 840 } 841 } else { 842 i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 843 if (mf_type == FLT_TYPE_PERMANENT_CE) { 844 dimmnm = mc_ff_dimm_unum_table[i]; 845 memb_num = dimmnm[0]; 846 snprintf(buf, buflen, "/%s/%s%c/MEM%s", 847 model_names[plat_model].unit_name, 848 model_names[plat_model].mem_name, 849 memb_num, &dimmnm[1]); 850 } else { 851 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 852 memb_num = mc_ff_dimm_unum_table[i][0], 853 snprintf(buf, buflen, 854 "/%s/%s%c/MEM%s MEM%s MEM%s MEM%s", 855 model_names[plat_model].unit_name, 856 model_names[plat_model].mem_name, memb_num, 857 &mc_ff_dimm_unum_table[i][1], 858 &mc_ff_dimm_unum_table[i + 1][1], 859 &mc_ff_dimm_unum_table[i + 2][1], 860 &mc_ff_dimm_unum_table[i + 3][1]); 861 } 862 } 863 return (0); 864 } 865 866 static void 867 mc_ereport_post(mc_aflt_t *mc_aflt) 868 { 869 char buf[FM_MAX_CLASS]; 870 char device_path[MAXPATHLEN]; 871 char sid[MAXPATHLEN]; 872 nv_alloc_t *nva = NULL; 873 nvlist_t *ereport, *detector, *resource; 874 errorq_elem_t *eqep; 875 int nflts; 876 mc_flt_stat_t *flt_stat; 877 int i, n; 878 int blen = MAXPATHLEN; 879 char *p, *s = NULL; 880 uint32_t values[2], synd[2], dslot[2]; 881 uint64_t offset = (uint64_t)-1; 882 int ret = -1; 883 884 if (panicstr) { 885 eqep = errorq_reserve(ereport_errorq); 886 if (eqep == NULL) 887 return; 888 ereport = errorq_elem_nvl(ereport_errorq, eqep); 889 nva = errorq_elem_nva(ereport_errorq, eqep); 890 } else { 891 ereport = fm_nvlist_create(nva); 892 } 893 894 /* 895 * Create the scheme "dev" FMRI. 896 */ 897 detector = fm_nvlist_create(nva); 898 resource = fm_nvlist_create(nva); 899 900 nflts = mc_aflt->mflt_nflts; 901 902 ASSERT(nflts >= 1 && nflts <= 2); 903 904 flt_stat = mc_aflt->mflt_stat[0]; 905 (void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path); 906 (void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL, 907 device_path, NULL); 908 909 /* 910 * Encode all the common data into the ereport. 911 */ 912 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", 913 MC_OPL_ERROR_CLASS, 914 mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : 915 MC_OPL_MI_SUBCLASS, 916 mc_aflt->mflt_erpt_class); 917 918 MC_LOG("mc_ereport_post: ereport %s\n", buf); 919 920 921 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 922 fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), 923 detector, NULL); 924 925 /* 926 * Set payload. 927 */ 928 fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32, 929 flt_stat->mf_flt_maddr.ma_bd, NULL); 930 931 fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64, 932 flt_stat->mf_flt_paddr, NULL); 933 934 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 935 fm_payload_set(ereport, MC_OPL_FLT_TYPE, 936 DATA_TYPE_UINT8, ECC_STICKY, NULL); 937 } 938 939 for (i = 0; i < nflts; i++) 940 values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank; 941 942 fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, 943 nflts, values, NULL); 944 945 for (i = 0; i < nflts; i++) 946 values[i] = mc_aflt->mflt_stat[i]->mf_cntl; 947 948 fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, 949 nflts, values, NULL); 950 951 for (i = 0; i < nflts; i++) 952 values[i] = mc_aflt->mflt_stat[i]->mf_err_add; 953 954 /* offset is set only for PCE */ 955 if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) { 956 offset = values[0]; 957 958 } 959 fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, 960 nflts, values, NULL); 961 962 for (i = 0; i < nflts; i++) 963 values[i] = mc_aflt->mflt_stat[i]->mf_err_log; 964 965 fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, 966 nflts, values, NULL); 967 968 for (i = 0; i < nflts; i++) { 969 flt_stat = mc_aflt->mflt_stat[i]; 970 if (flt_stat->mf_errlog_valid) { 971 synd[i] = flt_stat->mf_synd; 972 dslot[i] = flt_stat->mf_dimm_slot; 973 values[i] = flt_stat->mf_dram_place; 974 } else { 975 synd[i] = 0; 976 dslot[i] = 0; 977 values[i] = 0; 978 } 979 } 980 981 fm_payload_set(ereport, MC_OPL_ERR_SYND, 982 DATA_TYPE_UINT32_ARRAY, nflts, synd, NULL); 983 984 fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, 985 DATA_TYPE_UINT32_ARRAY, nflts, dslot, NULL); 986 987 fm_payload_set(ereport, MC_OPL_ERR_DRAM, 988 DATA_TYPE_UINT32_ARRAY, nflts, values, NULL); 989 990 device_path[0] = 0; 991 p = &device_path[0]; 992 sid[0] = 0; 993 s = &sid[0]; 994 ret = 0; 995 996 for (i = 0; i < nflts; i++) { 997 int bank; 998 999 flt_stat = mc_aflt->mflt_stat[i]; 1000 bank = flt_stat->mf_flt_maddr.ma_bank; 1001 ret = mc_set_mem_unum(p + strlen(p), blen, 1002 flt_stat->mf_flt_maddr.ma_bd, bank, flt_stat->mf_type, 1003 flt_stat->mf_dimm_slot); 1004 1005 if (ret != 0) { 1006 cmn_err(CE_WARN, 1007 "mc_ereport_post: Failed to determine the unum " 1008 "for board=%d bank=%d type=0x%x slot=0x%x", 1009 flt_stat->mf_flt_maddr.ma_bd, bank, 1010 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1011 continue; 1012 } 1013 n = strlen(device_path); 1014 blen = MAXPATHLEN - n; 1015 p = &device_path[n]; 1016 if (i < (nflts - 1)) { 1017 snprintf(p, blen, " "); 1018 blen--; 1019 p++; 1020 } 1021 1022 if (ret == 0) { 1023 ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s), 1024 blen, flt_stat->mf_flt_maddr.ma_bd, bank, 1025 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1026 1027 } 1028 } 1029 1030 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, 1031 NULL, device_path, (ret == 0) ? sid : NULL, 1032 (ret == 0) ? offset : (uint64_t)-1); 1033 1034 fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, 1035 resource, NULL); 1036 1037 if (panicstr) { 1038 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1039 } else { 1040 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1041 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1042 fm_nvlist_destroy(detector, FM_NVA_FREE); 1043 fm_nvlist_destroy(resource, FM_NVA_FREE); 1044 } 1045 } 1046 1047 1048 static void 1049 mc_err_drain(mc_aflt_t *mc_aflt) 1050 { 1051 int rv; 1052 uint64_t pa = (uint64_t)(-1); 1053 int i; 1054 1055 MC_LOG("mc_err_drain: %s\n", 1056 mc_aflt->mflt_erpt_class); 1057 /* 1058 * we come here only when we have: 1059 * In mirror mode: CMPE, MUE, SUE 1060 * In normal mode: UE, Permanent CE 1061 */ 1062 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1063 rv = mcaddr_to_pa(mc_aflt->mflt_mcp, 1064 &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa); 1065 if (rv == 0) 1066 mc_aflt->mflt_stat[i]->mf_flt_paddr = pa; 1067 else 1068 mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1; 1069 } 1070 1071 if (mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) { 1072 uint64_t errors = 0; 1073 1074 MC_LOG("mc_err_drain:pa = %lx\n", pa); 1075 1076 if (page_retire_check(pa, &errors) == 0) { 1077 MC_LOG("Page retired\n"); 1078 return; 1079 } 1080 if ((errors & mc_aflt->mflt_pr) == mc_aflt->mflt_pr) { 1081 MC_LOG("errors %lx, mflt_pr %x\n", 1082 errors, mc_aflt->mflt_pr); 1083 return; 1084 } 1085 MC_LOG("offline page at pa %lx error %x\n", pa, 1086 mc_aflt->mflt_pr); 1087 (void) page_retire(pa, mc_aflt->mflt_pr); 1088 } 1089 1090 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1091 mc_aflt_t mc_aflt0; 1092 if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) { 1093 mc_aflt0 = *mc_aflt; 1094 mc_aflt0.mflt_nflts = 1; 1095 mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i]; 1096 mc_ereport_post(&mc_aflt0); 1097 } 1098 } 1099 } 1100 1101 /* 1102 * The restart address is actually defined in unit of PA[37:6] 1103 * the mac patrol will convert that to dimm offset. If the 1104 * address is not in the bank, it will continue to search for 1105 * the next PA that is within the bank. 1106 * 1107 * Also the mac patrol scans the dimms based on PA, not 1108 * dimm offset. 1109 */ 1110 static int 1111 restart_patrol(mc_opl_t *mcp, int bank, mc_addr_info_t *maddr_info) 1112 { 1113 uint64_t pa; 1114 int rv; 1115 int loop_count = 0; 1116 1117 if (maddr_info == NULL || (maddr_info->mi_valid == 0)) { 1118 MAC_PTRL_START(mcp, bank); 1119 return (0); 1120 } 1121 1122 rv = mcaddr_to_pa(mcp, &maddr_info->mi_maddr, &pa); 1123 if (rv != 0) { 1124 MC_LOG("cannot convert mcaddr to pa. use auto restart\n"); 1125 MAC_PTRL_START(mcp, bank); 1126 return (0); 1127 } 1128 1129 /* 1130 * pa is the last address scanned by the mac patrol 1131 * we calculate the next restart address as follows: 1132 * first we always advance it by 64 byte. Then begin the loop. 1133 * loop { 1134 * if it is not in phys_install, we advance to next 64 MB boundary 1135 * if it is not backed by a page structure, done 1136 * if the page is bad, advance to the next page boundary. 1137 * else done 1138 * if the new address exceeds the board, wrap around. 1139 * } <stop if we come back to the same page> 1140 */ 1141 1142 if (pa < mcp->mc_start_address || pa >= (mcp->mc_start_address 1143 + mcp->mc_size)) { 1144 /* pa is not on this board, just retry */ 1145 cmn_err(CE_WARN, "restart_patrol: invalid address %lx " 1146 "on board %d\n", pa, mcp->mc_board_num); 1147 MAC_PTRL_START(mcp, bank); 1148 return (0); 1149 } 1150 1151 MC_LOG("restart_patrol: pa = %lx\n", pa); 1152 if (maddr_info->mi_advance) { 1153 uint64_t new_pa; 1154 1155 if (IS_MIRROR(mcp, bank)) 1156 new_pa = pa + 64 * 2; 1157 else 1158 new_pa = pa + 64; 1159 1160 if (!mc_valid_pa(mcp, new_pa)) { 1161 MC_LOG("Invalid PA\n"); 1162 pa = roundup(new_pa + 1, mc_isolation_bsize); 1163 } else { 1164 uint64_t errors = 0; 1165 if (page_retire_check(new_pa, &errors) && 1166 (errors == 0)) { 1167 MC_LOG("Page has no error\n"); 1168 pa = new_pa; 1169 goto done; 1170 } 1171 /* 1172 * skip bad pages 1173 * and let the following loop to take care 1174 */ 1175 pa = roundup(new_pa + 1, PAGESIZE); 1176 MC_LOG("Skipping bad page to %lx\n", pa); 1177 } 1178 } 1179 1180 /* 1181 * if we wrap around twice, we just give up and let 1182 * mac patrol decide. 1183 */ 1184 MC_LOG("pa is now %lx\n", pa); 1185 while (loop_count <= 1) { 1186 if (!mc_valid_pa(mcp, pa)) { 1187 MC_LOG("pa is not valid. round up to 64 MB\n"); 1188 pa = roundup(pa + 1, 64 * 1024 * 1024); 1189 } else { 1190 uint64_t errors = 0; 1191 if (page_retire_check(pa, &errors) && 1192 (errors == 0)) { 1193 MC_LOG("Page has no error\n"); 1194 break; 1195 } 1196 /* skip bad pages */ 1197 pa = roundup(pa + 1, PAGESIZE); 1198 MC_LOG("Skipping bad page to %lx\n", pa); 1199 } 1200 if (pa >= (mcp->mc_start_address + mcp->mc_size)) { 1201 MC_LOG("Wrap around\n"); 1202 pa = mcp->mc_start_address; 1203 loop_count++; 1204 } 1205 } 1206 1207 done: 1208 /* retstart MAC patrol: PA[37:6] */ 1209 MC_LOG("restart at pa = %lx\n", pa); 1210 ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa)); 1211 MAC_PTRL_START_ADD(mcp, bank); 1212 1213 return (0); 1214 } 1215 1216 /* 1217 * Rewriting is used for two purposes. 1218 * - to correct the error in memory. 1219 * - to determine whether the error is permanent or intermittent. 1220 * It's done by writing the address in MAC_BANKm_REWRITE_ADD 1221 * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that, 1222 * REW_END (and REW_CE/REW_UE if some error detected) is set when 1223 * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM. 1224 * 1225 * Note that rewrite operation doesn't change RAW_UE to Marked UE. 1226 * Therefore, we use it only CE case. 1227 */ 1228 static uint32_t 1229 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr) 1230 { 1231 uint32_t cntl; 1232 int count = 0; 1233 1234 /* first wait to make sure PTRL_STATUS is 0 */ 1235 while (count++ < mc_max_rewrite_loop) { 1236 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1237 if (!(cntl & MAC_CNTL_PTRL_STATUS)) 1238 break; 1239 drv_usecwait(mc_rewrite_delay); 1240 } 1241 if (count >= mc_max_rewrite_loop) 1242 goto bad; 1243 1244 count = 0; 1245 1246 ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr); 1247 MAC_REW_REQ(mcp, bank); 1248 1249 do { 1250 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1251 if (count++ >= mc_max_rewrite_loop) { 1252 goto bad; 1253 } else { 1254 drv_usecwait(mc_rewrite_delay); 1255 } 1256 /* 1257 * If there are other MEMORY or PCI activities, this 1258 * will be BUSY, else it should be set immediately 1259 */ 1260 } while (!(cntl & MAC_CNTL_REW_END)); 1261 1262 MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS); 1263 return (cntl); 1264 bad: 1265 /* This is bad. Just reset the circuit */ 1266 cmn_err(CE_WARN, "mc-opl rewrite timeout on /LSB%d/B%d\n", 1267 mcp->mc_board_num, bank); 1268 cntl = MAC_CNTL_REW_END; 1269 MAC_CMD(mcp, bank, MAC_CNTL_PTRL_RESET); 1270 MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS); 1271 return (cntl); 1272 } 1273 void 1274 mc_process_scf_log(mc_opl_t *mcp) 1275 { 1276 int count; 1277 int n = 0; 1278 scf_log_t *p; 1279 int bank; 1280 1281 for (bank = 0; bank < BANKNUM_PER_SB; bank++) { 1282 while ((p = mcp->mc_scf_log[bank]) != NULL && 1283 (n < mc_max_errlog_processed)) { 1284 ASSERT(bank == p->sl_bank); 1285 count = 0; 1286 while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank)) 1287 & MAC_STATIC_ERR_VLD)) { 1288 if (count++ >= (mc_max_scf_loop)) { 1289 break; 1290 } 1291 drv_usecwait(mc_scf_delay); 1292 } 1293 1294 if (count < mc_max_scf_loop) { 1295 ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank), 1296 p->sl_err_log); 1297 1298 ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank), 1299 p->sl_err_add|MAC_STATIC_ERR_VLD); 1300 mcp->mc_scf_retry[bank] = 0; 1301 } else { 1302 /* if we try too many times, just drop the req */ 1303 if (mcp->mc_scf_retry[bank]++ <= mc_max_scf_retry) { 1304 return; 1305 } else { 1306 if ((++mc_pce_dropped & 0xff) == 0) { 1307 cmn_err(CE_WARN, 1308 "Cannot report Permanent CE to SCF\n"); 1309 } 1310 } 1311 } 1312 n++; 1313 mcp->mc_scf_log[bank] = p->sl_next; 1314 mcp->mc_scf_total[bank]--; 1315 ASSERT(mcp->mc_scf_total[bank] >= 0); 1316 kmem_free(p, sizeof (scf_log_t)); 1317 } 1318 } 1319 } 1320 void 1321 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank) 1322 { 1323 scf_log_t *p; 1324 1325 if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) { 1326 if ((++mc_pce_dropped & 0xff) == 0) { 1327 cmn_err(CE_WARN, "Too many Permanent CE requests.\n"); 1328 } 1329 return; 1330 } 1331 p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP); 1332 p->sl_next = 0; 1333 p->sl_err_add = flt_stat->mf_err_add; 1334 p->sl_err_log = flt_stat->mf_err_log; 1335 p->sl_bank = bank; 1336 1337 if (mcp->mc_scf_log[bank] == NULL) { 1338 /* 1339 * we rely on mc_scf_log to detect NULL queue. 1340 * mc_scf_log_tail is irrelevant is such case. 1341 */ 1342 mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p; 1343 } else { 1344 mcp->mc_scf_log_tail[bank]->sl_next = p; 1345 mcp->mc_scf_log_tail[bank] = p; 1346 } 1347 mcp->mc_scf_total[bank]++; 1348 } 1349 /* 1350 * This routine determines what kind of CE happens, intermittent 1351 * or permanent as follows. (See 4.7.3 in Columbus2 PRM.) 1352 * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register. 1353 * - If CE is still detected on the same address even after doing 1354 * rewrite operation twice, it is determined as permanent error. 1355 * - If error is not detected anymore, it is determined as intermittent 1356 * error. 1357 * - If UE is detected due to rewrite operation, it should be treated 1358 * as UE. 1359 */ 1360 1361 /* ARGSUSED */ 1362 static void 1363 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error) 1364 { 1365 uint32_t cntl; 1366 int i; 1367 1368 flt_stat->mf_type = FLT_TYPE_PERMANENT_CE; 1369 /* 1370 * rewrite request 1st time reads and correct error data 1371 * and write to DIMM. 2nd rewrite request must be issued 1372 * after REW_CE/UE/END is 0. When the 2nd request is completed, 1373 * if REW_CE = 1, then it is permanent CE. 1374 */ 1375 for (i = 0; i < 2; i++) { 1376 cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add); 1377 /* 1378 * If the error becomes UE or CMPE 1379 * we return to the caller immediately. 1380 */ 1381 if (cntl & MAC_CNTL_REW_UE) { 1382 if (ptrl_error) 1383 flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE; 1384 else 1385 flt_stat->mf_cntl |= MAC_CNTL_MI_UE; 1386 flt_stat->mf_type = FLT_TYPE_UE; 1387 return; 1388 } 1389 if (cntl & MAC_CNTL_REW_CMPE) { 1390 if (ptrl_error) 1391 flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE; 1392 else 1393 flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE; 1394 flt_stat->mf_type = FLT_TYPE_CMPE; 1395 return; 1396 } 1397 } 1398 if (!(cntl & MAC_CNTL_REW_CE)) { 1399 flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE; 1400 } 1401 1402 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1403 /* report PERMANENT_CE to SP via SCF */ 1404 if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) { 1405 mc_queue_scf_log(mcp, flt_stat, bank); 1406 } 1407 } 1408 } 1409 1410 #define IS_CMPE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\ 1411 MAC_CNTL_MI_CMPE)) 1412 #define IS_UE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE)) 1413 #define IS_CE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE)) 1414 #define IS_OK(cntl, f) (!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \ 1415 MAC_CNTL_MI_ERRS))) 1416 1417 1418 static int 1419 IS_CE_ONLY(uint32_t cntl, int ptrl_error) 1420 { 1421 if (ptrl_error) { 1422 return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE); 1423 } else { 1424 return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE); 1425 } 1426 } 1427 1428 void 1429 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value) 1430 { 1431 if (mcp->mc_speedup_period[bank] > 0) 1432 value |= mc_max_speed; 1433 else 1434 value |= mcp->mc_speed; 1435 ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value); 1436 } 1437 1438 static void 1439 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1440 { 1441 flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1442 MAC_CNTL_PTRL_ERRS; 1443 flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank)); 1444 flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank)); 1445 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1446 flt_stat->mf_flt_maddr.ma_bank = bank; 1447 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1448 } 1449 1450 static void 1451 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1452 { 1453 uint32_t status, old_status; 1454 1455 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1456 MAC_CNTL_MI_ERRS; 1457 old_status = 0; 1458 1459 /* we keep reading until the status is stable */ 1460 while (old_status != status) { 1461 old_status = status; 1462 flt_stat->mf_err_add = 1463 LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank)); 1464 flt_stat->mf_err_log = 1465 LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank)); 1466 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1467 MAC_CNTL_MI_ERRS; 1468 if (status == old_status) { 1469 break; 1470 } 1471 } 1472 1473 flt_stat->mf_cntl = status; 1474 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1475 flt_stat->mf_flt_maddr.ma_bank = bank; 1476 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1477 } 1478 1479 1480 /* 1481 * Error philosophy for mirror mode: 1482 * 1483 * PTRL (The error address for both banks are same, since ptrl stops if it 1484 * detects error.) 1485 * - Compaire error Report CMPE. 1486 * 1487 * - UE-UE Report MUE. No rewrite. 1488 * 1489 * - UE-* UE-(CE/OK). Rewrite to scrub UE. Report SUE. 1490 * 1491 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1492 * If CE is permanent, inform SCF. Once for each 1493 * Dimm. If CE becomes UE or CMPE, go back to above. 1494 * 1495 * 1496 * MI (The error addresses for each bank are the same or different.) 1497 * - Compair error If addresses are the same. Just CMPE. 1498 * If addresses are different (this could happen 1499 * as a result of scrubbing. Report each seperately. 1500 * Only report error info on each side. 1501 * 1502 * - UE-UE Addresses are the same. Report MUE. 1503 * Addresses are different. Report SUE on each bank. 1504 * Rewrite to clear UE. 1505 * 1506 * - UE-* UE-(CE/OK) 1507 * Rewrite to clear UE. Report SUE for the bank. 1508 * 1509 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1510 * If CE becomes UE or CMPE, go back to above. 1511 * 1512 */ 1513 1514 static int 1515 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat) 1516 { 1517 int ptrl_error = mc_aflt->mflt_is_ptrl; 1518 int i; 1519 int rv = 0; 1520 1521 MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n", 1522 flt_stat[0].mf_cntl, flt_stat[1].mf_cntl); 1523 1524 if (ptrl_error) { 1525 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) 1526 & MAC_CNTL_PTRL_ERRS) == 0) 1527 return (0); 1528 } else { 1529 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) 1530 & MAC_CNTL_MI_ERRS) == 0) 1531 return (0); 1532 } 1533 1534 /* 1535 * First we take care of the case of CE 1536 * because they can become UE or CMPE 1537 */ 1538 for (i = 0; i < 2; i++) { 1539 if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) { 1540 MC_LOG("CE detected on bank %d\n", 1541 flt_stat[i].mf_flt_maddr.ma_bank); 1542 mc_scrub_ce(mcp, flt_stat[i].mf_flt_maddr.ma_bank, 1543 &flt_stat[i], ptrl_error); 1544 rv = 1; 1545 } 1546 } 1547 1548 /* The above scrubbing can turn CE into UE or CMPE */ 1549 1550 /* 1551 * Now we distinguish two cases: same address or not 1552 * the same address. It might seem more intuitive to 1553 * distinguish PTRL v.s. MI error but it is more 1554 * complicated that way. 1555 */ 1556 1557 if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) { 1558 1559 if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) || 1560 IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) { 1561 flt_stat[0].mf_type = FLT_TYPE_CMPE; 1562 flt_stat[1].mf_type = FLT_TYPE_CMPE; 1563 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1564 MC_LOG("cmpe error detected\n"); 1565 mc_aflt->mflt_nflts = 2; 1566 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1567 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1568 mc_aflt->mflt_pr = PR_UE; 1569 mc_err_drain(mc_aflt); 1570 return (1); 1571 } 1572 1573 if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) && 1574 IS_UE(flt_stat[1].mf_cntl, ptrl_error)) { 1575 /* Both side are UE's */ 1576 1577 MAC_SET_ERRLOG_INFO(&flt_stat[0]); 1578 MAC_SET_ERRLOG_INFO(&flt_stat[1]); 1579 MC_LOG("MUE detected\n"); 1580 flt_stat[0].mf_type = FLT_TYPE_MUE; 1581 flt_stat[1].mf_type = FLT_TYPE_MUE; 1582 mc_aflt->mflt_erpt_class = MC_OPL_MUE; 1583 mc_aflt->mflt_nflts = 2; 1584 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1585 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1586 mc_aflt->mflt_pr = PR_UE; 1587 mc_err_drain(mc_aflt); 1588 return (1); 1589 } 1590 1591 /* Now the only case is UE/CE, UE/OK, or don't care */ 1592 for (i = 0; i < 2; i++) { 1593 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1594 1595 /* rewrite can clear the one side UE error */ 1596 1597 if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) { 1598 (void) do_rewrite(mcp, 1599 flt_stat[i].mf_flt_maddr.ma_bank, 1600 flt_stat[i].mf_flt_maddr.ma_dimm_addr); 1601 } 1602 flt_stat[i].mf_type = FLT_TYPE_UE; 1603 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1604 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1605 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1606 mc_aflt->mflt_nflts = 1; 1607 mc_aflt->mflt_pr = PR_MCE; 1608 mc_err_drain(mc_aflt); 1609 /* Once we hit a UE/CE or UE/OK case, done */ 1610 return (1); 1611 } 1612 } 1613 1614 } else { 1615 /* 1616 * addresses are different. That means errors 1617 * on the 2 banks are not related at all. 1618 */ 1619 for (i = 0; i < 2; i++) { 1620 if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) { 1621 flt_stat[i].mf_type = FLT_TYPE_CMPE; 1622 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1623 MC_LOG("cmpe error detected\n"); 1624 mc_aflt->mflt_nflts = 1; 1625 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1626 mc_aflt->mflt_pr = PR_UE; 1627 mc_err_drain(mc_aflt); 1628 /* no more report on this bank */ 1629 flt_stat[i].mf_cntl = 0; 1630 rv = 1; 1631 } 1632 } 1633 1634 /* rewrite can clear the one side UE error */ 1635 1636 for (i = 0; i < 2; i++) { 1637 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1638 (void) do_rewrite(mcp, 1639 flt_stat[i].mf_flt_maddr.ma_bank, 1640 flt_stat[i].mf_flt_maddr.ma_dimm_addr); 1641 flt_stat[i].mf_type = FLT_TYPE_UE; 1642 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1643 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1644 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1645 mc_aflt->mflt_nflts = 1; 1646 mc_aflt->mflt_pr = PR_MCE; 1647 mc_err_drain(mc_aflt); 1648 rv = 1; 1649 } 1650 } 1651 } 1652 return (rv); 1653 } 1654 static void 1655 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_addr_info_t *maddr) 1656 { 1657 mc_aflt_t mc_aflt; 1658 mc_flt_stat_t flt_stat[2], mi_flt_stat[2]; 1659 int i; 1660 int mi_valid; 1661 1662 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1663 bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t)); 1664 bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t)); 1665 1666 mc_aflt.mflt_mcp = mcp; 1667 mc_aflt.mflt_id = gethrtime(); 1668 1669 /* Now read all the registers into flt_stat */ 1670 1671 for (i = 0; i < 2; i++) { 1672 MC_LOG("Reading registers of bank %d\n", bank); 1673 /* patrol registers */ 1674 mc_read_ptrl_reg(mcp, bank, &flt_stat[i]); 1675 1676 ASSERT(maddr); 1677 maddr->mi_maddr = flt_stat[i].mf_flt_maddr; 1678 1679 MC_LOG("ptrl registers cntl %x add %x log %x\n", 1680 flt_stat[i].mf_cntl, 1681 flt_stat[i].mf_err_add, 1682 flt_stat[i].mf_err_log); 1683 1684 /* MI registers */ 1685 mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]); 1686 1687 MC_LOG("MI registers cntl %x add %x log %x\n", 1688 mi_flt_stat[i].mf_cntl, 1689 mi_flt_stat[i].mf_err_add, 1690 mi_flt_stat[i].mf_err_log); 1691 1692 bank = bank^1; 1693 } 1694 1695 /* clear errors once we read all the registers */ 1696 MAC_CLEAR_ERRS(mcp, bank, 1697 (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1698 1699 MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1700 1701 /* Process MI errors first */ 1702 1703 /* if not error mode, cntl1 is 0 */ 1704 if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1705 (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1706 mi_flt_stat[0].mf_cntl = 0; 1707 1708 if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1709 (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1710 mi_flt_stat[1].mf_cntl = 0; 1711 1712 mc_aflt.mflt_is_ptrl = 0; 1713 mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]); 1714 1715 if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1716 MAC_CNTL_PTRL_ERR_SHIFT) == 1717 ((mi_flt_stat[0].mf_cntl & MAC_CNTL_MI_ERRS) >> 1718 MAC_CNTL_MI_ERR_SHIFT)) && 1719 (flt_stat[0].mf_err_add == mi_flt_stat[0].mf_err_add) && 1720 (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1721 MAC_CNTL_PTRL_ERR_SHIFT) == 1722 ((mi_flt_stat[1].mf_cntl & MAC_CNTL_MI_ERRS) >> 1723 MAC_CNTL_MI_ERR_SHIFT)) && 1724 (flt_stat[1].mf_err_add == mi_flt_stat[1].mf_err_add)) { 1725 #ifdef DEBUG 1726 MC_LOG("discarding PTRL error because " 1727 "it is the same as MI\n"); 1728 #endif 1729 maddr->mi_valid = mi_valid; 1730 return; 1731 } 1732 /* if not error mode, cntl1 is 0 */ 1733 if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1734 (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1735 flt_stat[0].mf_cntl = 0; 1736 1737 if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1738 (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1739 flt_stat[1].mf_cntl = 0; 1740 1741 mc_aflt.mflt_is_ptrl = 1; 1742 maddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]); 1743 } 1744 static int 1745 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt, 1746 mc_flt_stat_t *flt_stat) 1747 { 1748 int ptrl_error = mc_aflt->mflt_is_ptrl; 1749 int rv = 0; 1750 1751 mc_aflt->mflt_erpt_class = NULL; 1752 if (IS_UE(flt_stat->mf_cntl, ptrl_error)) { 1753 MC_LOG("UE deteceted\n"); 1754 flt_stat->mf_type = FLT_TYPE_UE; 1755 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1756 mc_aflt->mflt_pr = PR_UE; 1757 MAC_SET_ERRLOG_INFO(flt_stat); 1758 rv = 1; 1759 } else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) { 1760 MC_LOG("CE deteceted\n"); 1761 MAC_SET_ERRLOG_INFO(flt_stat); 1762 1763 /* Error type can change after scrubing */ 1764 mc_scrub_ce(mcp, bank, flt_stat, ptrl_error); 1765 1766 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1767 mc_aflt->mflt_erpt_class = MC_OPL_CE; 1768 mc_aflt->mflt_pr = PR_MCE; 1769 } else if (flt_stat->mf_type == FLT_TYPE_UE) { 1770 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1771 mc_aflt->mflt_pr = PR_UE; 1772 } 1773 rv = 1; 1774 } 1775 MC_LOG("mc_process_error: fault type %x erpt %s\n", 1776 flt_stat->mf_type, 1777 mc_aflt->mflt_erpt_class); 1778 if (mc_aflt->mflt_erpt_class) { 1779 mc_aflt->mflt_stat[0] = flt_stat; 1780 mc_aflt->mflt_nflts = 1; 1781 mc_err_drain(mc_aflt); 1782 } 1783 return (rv); 1784 } 1785 1786 static void 1787 mc_error_handler(mc_opl_t *mcp, int bank, mc_addr_info_t *maddr) 1788 { 1789 mc_aflt_t mc_aflt; 1790 mc_flt_stat_t flt_stat, mi_flt_stat; 1791 int mi_valid; 1792 1793 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1794 bzero(&flt_stat, sizeof (mc_flt_stat_t)); 1795 bzero(&mi_flt_stat, sizeof (mc_flt_stat_t)); 1796 1797 mc_aflt.mflt_mcp = mcp; 1798 mc_aflt.mflt_id = gethrtime(); 1799 1800 /* patrol registers */ 1801 mc_read_ptrl_reg(mcp, bank, &flt_stat); 1802 1803 ASSERT(maddr); 1804 maddr->mi_maddr = flt_stat.mf_flt_maddr; 1805 1806 MC_LOG("ptrl registers cntl %x add %x log %x\n", 1807 flt_stat.mf_cntl, 1808 flt_stat.mf_err_add, 1809 flt_stat.mf_err_log); 1810 1811 /* MI registers */ 1812 mc_read_mi_reg(mcp, bank, &mi_flt_stat); 1813 1814 1815 MC_LOG("MI registers cntl %x add %x log %x\n", 1816 mi_flt_stat.mf_cntl, 1817 mi_flt_stat.mf_err_add, 1818 mi_flt_stat.mf_err_log); 1819 1820 /* clear errors once we read all the registers */ 1821 MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1822 1823 mc_aflt.mflt_is_ptrl = 0; 1824 if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) && 1825 ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 1826 ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 1827 mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat); 1828 } 1829 1830 if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1831 MAC_CNTL_PTRL_ERR_SHIFT) == 1832 ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) >> 1833 MAC_CNTL_MI_ERR_SHIFT)) && 1834 (flt_stat.mf_err_add == mi_flt_stat.mf_err_add)) { 1835 #ifdef DEBUG 1836 MC_LOG("discarding PTRL error because " 1837 "it is the same as MI\n"); 1838 #endif 1839 maddr->mi_valid = mi_valid; 1840 return; 1841 } 1842 1843 mc_aflt.mflt_is_ptrl = 1; 1844 if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) && 1845 ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 1846 ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 1847 maddr->mi_valid = mc_process_error(mcp, bank, 1848 &mc_aflt, &flt_stat); 1849 } 1850 } 1851 /* 1852 * memory patrol error handling algorithm: 1853 * timeout() is used to do periodic polling 1854 * This is the flow chart. 1855 * timeout -> 1856 * mc_check_errors() 1857 * if memory bank is installed, read the status register 1858 * if any error bit is set, 1859 * -> mc_error_handler() 1860 * -> read all error regsiters 1861 * -> mc_process_error() 1862 * determine error type 1863 * rewrite to clear error or scrub to determine CE type 1864 * inform SCF on permanent CE 1865 * -> mc_err_drain 1866 * page offline processing 1867 * -> mc_ereport_post() 1868 */ 1869 1870 static void 1871 mc_check_errors_func(mc_opl_t *mcp) 1872 { 1873 mc_addr_info_t maddr_info; 1874 int i, error_count = 0; 1875 uint32_t stat, cntl; 1876 int running; 1877 int wrapped; 1878 1879 /* 1880 * scan errors. 1881 */ 1882 if (mcp->mc_status & MC_MEMORYLESS) 1883 return; 1884 1885 for (i = 0; i < BANKNUM_PER_SB; i++) { 1886 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 1887 stat = ldphysio(MAC_PTRL_STAT(mcp, i)); 1888 cntl = ldphysio(MAC_PTRL_CNTL(mcp, i)); 1889 running = cntl & MAC_CNTL_PTRL_START; 1890 wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX; 1891 1892 if (mc_debug_show_all || stat) { 1893 MC_LOG("/LSB%d/B%d stat %x cntl %x\n", 1894 mcp->mc_board_num, i, 1895 stat, cntl); 1896 } 1897 1898 /* 1899 * Update stats and reset flag if the HW patrol 1900 * wrapped around in its scan. 1901 */ 1902 if (wrapped) { 1903 mcp->mc_period[i]++; 1904 MC_LOG("mc period %ld on " 1905 "/LSB%d/B%d\n", mcp->mc_period[i], 1906 mcp->mc_board_num, i); 1907 MAC_CLEAR_MAX(mcp, i); 1908 } 1909 1910 if (running) { 1911 /* 1912 * Mac patrol HW is still running. 1913 * Normally when an error is detected, 1914 * the HW patrol will stop so that we 1915 * can collect error data for reporting. 1916 * Certain errors (MI errors) detected may not 1917 * cause the HW patrol to stop which is a 1918 * problem since we cannot read error data while 1919 * the HW patrol is running. SW is not allowed 1920 * to stop the HW patrol while it is running 1921 * as it may cause HW inconsistency. This is 1922 * described in a HW errata. 1923 * In situations where we detected errors 1924 * that may not cause the HW patrol to stop. 1925 * We speed up the HW patrol scanning in 1926 * the hope that it will find the 'real' PTRL 1927 * errors associated with the previous errors 1928 * causing the HW to finally stop so that we 1929 * can do the reporting. 1930 */ 1931 /* 1932 * Check to see if we did speed up 1933 * the HW patrol due to previous errors 1934 * detected that did not cause the patrol 1935 * to stop. We only do it if HW patrol scan 1936 * wrapped (counted as completing a 'period'). 1937 */ 1938 if (mcp->mc_speedup_period[i] > 0) { 1939 if (wrapped && 1940 (--mcp->mc_speedup_period[i] == 0)) { 1941 /* 1942 * We did try to speed up. 1943 * The speed up period has expired 1944 * and the HW patrol is still running. 1945 * The errors must be intermittent. 1946 * We have no choice but to ignore 1947 * them, reset the scan speed to normal 1948 * and clear the MI error bits. 1949 */ 1950 MC_LOG("Clearing MI errors\n"); 1951 MAC_CLEAR_ERRS(mcp, i, 1952 MAC_CNTL_MI_ERRS); 1953 } 1954 } else if (stat & MAC_STAT_MI_ERRS) { 1955 /* 1956 * MI errors detected but we cannot 1957 * report them since the HW patrol 1958 * is still running. 1959 * We will attempt to speed up the 1960 * scanning and hopefully the HW 1961 * can detect PRTL errors at the same 1962 * location that cause the HW patrol 1963 * to stop. 1964 */ 1965 mcp->mc_speedup_period[i] = 2; 1966 MAC_CMD(mcp, i, 0); 1967 } 1968 } else if (stat & (MAC_STAT_PTRL_ERRS | 1969 MAC_STAT_MI_ERRS)) { 1970 /* 1971 * HW Patrol has stopped and we found errors. 1972 * Proceed to collect and report error info. 1973 */ 1974 mcp->mc_speedup_period[i] = 0; 1975 maddr_info.mi_valid = 0; 1976 maddr_info.mi_advance = 1; 1977 if (IS_MIRROR(mcp, i)) 1978 mc_error_handler_mir(mcp, i, &maddr_info); 1979 else 1980 mc_error_handler(mcp, i, &maddr_info); 1981 1982 error_count++; 1983 restart_patrol(mcp, i, &maddr_info); 1984 } else { 1985 /* 1986 * HW patrol scan has apparently stopped 1987 * but no errors detected/flagged. 1988 * Restart the HW patrol just to be sure. 1989 */ 1990 restart_patrol(mcp, i, NULL); 1991 } 1992 } 1993 } 1994 if (error_count > 0) 1995 mcp->mc_last_error += error_count; 1996 else 1997 mcp->mc_last_error = 0; 1998 } 1999 2000 /* 2001 * mc_polling -- Check errors for only one instance, 2002 * but process errors for all instances to make sure we drain the errors 2003 * faster than they can be accumulated. 2004 * 2005 * Polling on each board should be done only once per each 2006 * mc_patrol_interval_sec. This is equivalent to setting mc_tick_left 2007 * to OPL_MAX_BOARDS and decrement by 1 on each timeout. 2008 * Once mc_tick_left becomes negative, the board becomes a candidate 2009 * for polling because it has waited for at least 2010 * mc_patrol_interval_sec's long. If mc_timeout_period is calculated 2011 * differently, this has to beupdated accordingly. 2012 */ 2013 2014 static void 2015 mc_polling(void) 2016 { 2017 int i, scan_error; 2018 mc_opl_t *mcp; 2019 2020 2021 scan_error = 1; 2022 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2023 mutex_enter(&mcmutex); 2024 if ((mcp = mc_instances[i]) == NULL) { 2025 mutex_exit(&mcmutex); 2026 continue; 2027 } 2028 mutex_enter(&mcp->mc_lock); 2029 mutex_exit(&mcmutex); 2030 if (scan_error && mcp->mc_tick_left <= 0) { 2031 mc_check_errors_func((void *)mcp); 2032 mcp->mc_tick_left = OPL_MAX_BOARDS; 2033 scan_error = 0; 2034 } else { 2035 mcp->mc_tick_left--; 2036 } 2037 mc_process_scf_log(mcp); 2038 mutex_exit(&mcp->mc_lock); 2039 } 2040 } 2041 2042 static void 2043 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr) 2044 { 2045 maddr->ma_bd = mcp->mc_board_num; 2046 maddr->ma_bank = bank; 2047 maddr->ma_dimm_addr = 0; 2048 } 2049 2050 typedef struct mc_mem_range { 2051 uint64_t addr; 2052 uint64_t size; 2053 } mc_mem_range_t; 2054 2055 static int 2056 get_base_address(mc_opl_t *mcp) 2057 { 2058 mc_mem_range_t *mem_range; 2059 int len; 2060 2061 if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2062 "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) { 2063 return (DDI_FAILURE); 2064 } 2065 2066 mcp->mc_start_address = mem_range->addr; 2067 mcp->mc_size = mem_range->size; 2068 2069 kmem_free(mem_range, len); 2070 return (DDI_SUCCESS); 2071 } 2072 2073 struct mc_addr_spec { 2074 uint32_t bank; 2075 uint32_t phys_hi; 2076 uint32_t phys_lo; 2077 }; 2078 2079 #define REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo) 2080 2081 static char *mc_tbl_name[] = { 2082 "cs0-mc-pa-trans-table", 2083 "cs1-mc-pa-trans-table" 2084 }; 2085 2086 static int 2087 mc_valid_pa(mc_opl_t *mcp, uint64_t pa) 2088 { 2089 struct memlist *ml; 2090 2091 if (mcp->mlist == NULL) 2092 mc_get_mlist(mcp); 2093 2094 for (ml = mcp->mlist; ml; ml = ml->next) { 2095 if (ml->address <= pa && pa < (ml->address + ml->size)) 2096 return (1); 2097 } 2098 return (0); 2099 } 2100 2101 static void 2102 mc_memlist_delete(struct memlist *mlist) 2103 { 2104 struct memlist *ml; 2105 2106 for (ml = mlist; ml; ml = mlist) { 2107 mlist = ml->next; 2108 kmem_free(ml, sizeof (struct memlist)); 2109 } 2110 } 2111 2112 static struct memlist * 2113 mc_memlist_dup(struct memlist *mlist) 2114 { 2115 struct memlist *hl = NULL, *tl, **mlp; 2116 2117 if (mlist == NULL) 2118 return (NULL); 2119 2120 mlp = &hl; 2121 tl = *mlp; 2122 for (; mlist; mlist = mlist->next) { 2123 *mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP); 2124 (*mlp)->address = mlist->address; 2125 (*mlp)->size = mlist->size; 2126 (*mlp)->prev = tl; 2127 tl = *mlp; 2128 mlp = &((*mlp)->next); 2129 } 2130 *mlp = NULL; 2131 2132 return (hl); 2133 } 2134 2135 2136 static struct memlist * 2137 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len) 2138 { 2139 uint64_t end; 2140 struct memlist *ml, *tl, *nlp; 2141 2142 if (mlist == NULL) 2143 return (NULL); 2144 2145 end = base + len; 2146 if ((end <= mlist->address) || (base == end)) 2147 return (mlist); 2148 2149 for (tl = ml = mlist; ml; tl = ml, ml = nlp) { 2150 uint64_t mend; 2151 2152 nlp = ml->next; 2153 2154 if (end <= ml->address) 2155 break; 2156 2157 mend = ml->address + ml->size; 2158 if (base < mend) { 2159 if (base <= ml->address) { 2160 ml->address = end; 2161 if (end >= mend) 2162 ml->size = 0ull; 2163 else 2164 ml->size = mend - ml->address; 2165 } else { 2166 ml->size = base - ml->address; 2167 if (end < mend) { 2168 struct memlist *nl; 2169 /* 2170 * splitting an memlist entry. 2171 */ 2172 nl = kmem_alloc(sizeof (struct memlist), 2173 KM_SLEEP); 2174 nl->address = end; 2175 nl->size = mend - nl->address; 2176 if ((nl->next = nlp) != NULL) 2177 nlp->prev = nl; 2178 nl->prev = ml; 2179 ml->next = nl; 2180 nlp = nl; 2181 } 2182 } 2183 if (ml->size == 0ull) { 2184 if (ml == mlist) { 2185 if ((mlist = nlp) != NULL) 2186 nlp->prev = NULL; 2187 kmem_free(ml, sizeof (struct memlist)); 2188 if (mlist == NULL) 2189 break; 2190 ml = nlp; 2191 } else { 2192 if ((tl->next = nlp) != NULL) 2193 nlp->prev = tl; 2194 kmem_free(ml, sizeof (struct memlist)); 2195 ml = tl; 2196 } 2197 } 2198 } 2199 } 2200 2201 return (mlist); 2202 } 2203 2204 static void 2205 mc_get_mlist(mc_opl_t *mcp) 2206 { 2207 struct memlist *mlist; 2208 2209 memlist_read_lock(); 2210 mlist = mc_memlist_dup(phys_install); 2211 memlist_read_unlock(); 2212 2213 if (mlist) { 2214 mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address); 2215 } 2216 2217 if (mlist) { 2218 uint64_t startpa, endpa; 2219 2220 startpa = mcp->mc_start_address + mcp->mc_size; 2221 endpa = ptob(physmax + 1); 2222 if (endpa > startpa) { 2223 mlist = mc_memlist_del_span(mlist, 2224 startpa, endpa - startpa); 2225 } 2226 } 2227 2228 if (mlist) { 2229 mcp->mlist = mlist; 2230 } 2231 } 2232 2233 int 2234 mc_board_add(mc_opl_t *mcp) 2235 { 2236 struct mc_addr_spec *macaddr; 2237 cs_status_t *cs_status; 2238 int len, len1, i, bk, cc; 2239 mc_addr_info_t maddr; 2240 uint32_t mirr; 2241 int nbanks = 0; 2242 uint64_t nbytes = 0; 2243 2244 /* 2245 * Get configurations from "pseudo-mc" node which includes: 2246 * board# : LSB number 2247 * mac-addr : physical base address of MAC registers 2248 * csX-mac-pa-trans-table: translation table from DIMM address 2249 * to physical address or vice versa. 2250 */ 2251 mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip, 2252 DDI_PROP_DONTPASS, "board#", -1); 2253 2254 if (mcp->mc_board_num == -1) { 2255 return (DDI_FAILURE); 2256 } 2257 2258 /* 2259 * Get start address in this CAB. It can be gotten from 2260 * "sb-mem-ranges" property. 2261 */ 2262 2263 if (get_base_address(mcp) == DDI_FAILURE) { 2264 return (DDI_FAILURE); 2265 } 2266 /* get mac-pa trans tables */ 2267 for (i = 0; i < MC_TT_CS; i++) { 2268 len = MC_TT_ENTRIES; 2269 cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip, 2270 DDI_PROP_DONTPASS, mc_tbl_name[i], 2271 (caddr_t)mcp->mc_trans_table[i], &len); 2272 2273 if (cc != DDI_SUCCESS) { 2274 bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES); 2275 } 2276 } 2277 mcp->mlist = NULL; 2278 2279 mc_get_mlist(mcp); 2280 2281 /* initialize bank informations */ 2282 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2283 "mc-addr", (caddr_t)&macaddr, &len); 2284 if (cc != DDI_SUCCESS) { 2285 cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc); 2286 return (DDI_FAILURE); 2287 } 2288 2289 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2290 "cs-status", (caddr_t)&cs_status, &len1); 2291 2292 if (cc != DDI_SUCCESS) { 2293 if (len > 0) 2294 kmem_free(macaddr, len); 2295 cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc); 2296 return (DDI_FAILURE); 2297 } 2298 2299 mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL); 2300 2301 for (i = 0; i < len1 / sizeof (cs_status_t); i++) { 2302 nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) | 2303 ((uint64_t)cs_status[i].cs_avail_low); 2304 } 2305 if (len1 > 0) 2306 kmem_free(cs_status, len1); 2307 nbanks = len / sizeof (struct mc_addr_spec); 2308 2309 if (nbanks > 0) 2310 nbytes /= nbanks; 2311 else { 2312 /* No need to free macaddr because len must be 0 */ 2313 mcp->mc_status |= MC_MEMORYLESS; 2314 return (DDI_SUCCESS); 2315 } 2316 2317 for (i = 0; i < BANKNUM_PER_SB; i++) { 2318 mcp->mc_scf_retry[i] = 0; 2319 mcp->mc_period[i] = 0; 2320 mcp->mc_speedup_period[i] = 0; 2321 } 2322 2323 /* 2324 * Get the memory size here. Let it be B (bytes). 2325 * Let T be the time in u.s. to scan 64 bytes. 2326 * If we want to complete 1 round of scanning in P seconds. 2327 * 2328 * B * T * 10^(-6) = P 2329 * --------------- 2330 * 64 2331 * 2332 * T = P * 64 * 10^6 2333 * ------------- 2334 * B 2335 * 2336 * = P * 64 * 10^6 2337 * ------------- 2338 * B 2339 * 2340 * The timing bits are set in PTRL_CNTL[28:26] where 2341 * 2342 * 0 - 1 m.s 2343 * 1 - 512 u.s. 2344 * 10 - 256 u.s. 2345 * 11 - 128 u.s. 2346 * 100 - 64 u.s. 2347 * 101 - 32 u.s. 2348 * 110 - 0 u.s. 2349 * 111 - reserved. 2350 * 2351 * 2352 * a[0] = 110, a[1] = 101, ... a[6] = 0 2353 * 2354 * cs-status property is int x 7 2355 * 0 - cs# 2356 * 1 - cs-status 2357 * 2 - cs-avail.hi 2358 * 3 - cs-avail.lo 2359 * 4 - dimm-capa.hi 2360 * 5 - dimm-capa.lo 2361 * 6 - #of dimms 2362 */ 2363 2364 if (nbytes > 0) { 2365 int i; 2366 uint64_t ms; 2367 ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes; 2368 mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds; 2369 for (i = 0; i < MC_MAX_SPEEDS - 1; i++) { 2370 if (ms < mc_scan_speeds[i + 1].mc_period) { 2371 mcp->mc_speed = mc_scan_speeds[i].mc_speeds; 2372 break; 2373 } 2374 } 2375 } else 2376 mcp->mc_speed = 0; 2377 2378 2379 for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) { 2380 struct mc_bank *bankp; 2381 uint32_t reg; 2382 2383 /* 2384 * setup bank 2385 */ 2386 bk = macaddr[i].bank; 2387 bankp = &(mcp->mc_bank[bk]); 2388 bankp->mcb_status = BANK_INSTALLED; 2389 bankp->mcb_reg_base = REGS_PA(macaddr, i); 2390 2391 reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk)); 2392 bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS); 2393 2394 /* 2395 * check if mirror mode 2396 */ 2397 mirr = LD_MAC_REG(MAC_MIRR(mcp, bk)); 2398 2399 if (mirr & MAC_MIRR_MIRROR_MODE) { 2400 MC_LOG("Mirror -> /LSB%d/B%d\n", 2401 mcp->mc_board_num, bk); 2402 bankp->mcb_status |= BANK_MIRROR_MODE; 2403 /* 2404 * The following bit is only used for 2405 * error injection. We should clear it 2406 */ 2407 if (mirr & MAC_MIRR_BANK_EXCLUSIVE) 2408 ST_MAC_REG(MAC_MIRR(mcp, bk), 2409 0); 2410 } 2411 2412 /* 2413 * restart if not mirror mode or the other bank 2414 * of the mirror is not running 2415 */ 2416 if (!(mirr & MAC_MIRR_MIRROR_MODE) || 2417 !(mcp->mc_bank[bk^1].mcb_status & 2418 BANK_PTRL_RUNNING)) { 2419 MC_LOG("Starting up /LSB%d/B%d\n", 2420 mcp->mc_board_num, bk); 2421 get_ptrl_start_address(mcp, bk, &maddr.mi_maddr); 2422 maddr.mi_maddr.ma_bd = mcp->mc_board_num; 2423 maddr.mi_maddr.ma_bank = bk; 2424 maddr.mi_maddr.ma_dimm_addr = 0; 2425 maddr.mi_valid = 0; 2426 maddr.mi_advance = 0; 2427 restart_patrol(mcp, bk, &maddr); 2428 } else { 2429 MC_LOG("Not starting up /LSB%d/B%d\n", 2430 mcp->mc_board_num, bk); 2431 } 2432 bankp->mcb_status |= BANK_PTRL_RUNNING; 2433 } 2434 if (len > 0) 2435 kmem_free(macaddr, len); 2436 2437 mcp->mc_dimm_list = mc_get_dimm_list(mcp); 2438 2439 /* 2440 * set interval in HZ. 2441 */ 2442 mcp->mc_last_error = 0; 2443 2444 /* restart memory patrol checking */ 2445 mcp->mc_status |= MC_POLL_RUNNING; 2446 2447 return (DDI_SUCCESS); 2448 } 2449 2450 int 2451 mc_board_del(mc_opl_t *mcp) 2452 { 2453 int i; 2454 scf_log_t *p; 2455 2456 /* 2457 * cleanup mac state 2458 */ 2459 mutex_enter(&mcp->mc_lock); 2460 if (mcp->mc_status & MC_MEMORYLESS) { 2461 mutex_exit(&mcp->mc_lock); 2462 mutex_destroy(&mcp->mc_lock); 2463 return (DDI_SUCCESS); 2464 } 2465 for (i = 0; i < BANKNUM_PER_SB; i++) { 2466 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2467 mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED; 2468 } 2469 } 2470 2471 /* stop memory patrol checking */ 2472 if (mcp->mc_status & MC_POLL_RUNNING) { 2473 mcp->mc_status &= ~MC_POLL_RUNNING; 2474 } 2475 2476 /* just throw away all the scf logs */ 2477 for (i = 0; i < BANKNUM_PER_SB; i++) { 2478 while ((p = mcp->mc_scf_log[i]) != NULL) { 2479 mcp->mc_scf_log[i] = p->sl_next; 2480 mcp->mc_scf_total[i]--; 2481 kmem_free(p, sizeof (scf_log_t)); 2482 } 2483 } 2484 2485 if (mcp->mlist) 2486 mc_memlist_delete(mcp->mlist); 2487 2488 if (mcp->mc_dimm_list) 2489 mc_free_dimm_list(mcp->mc_dimm_list); 2490 2491 mutex_exit(&mcp->mc_lock); 2492 2493 mutex_destroy(&mcp->mc_lock); 2494 return (DDI_SUCCESS); 2495 } 2496 2497 int 2498 mc_suspend(mc_opl_t *mcp, uint32_t flag) 2499 { 2500 /* stop memory patrol checking */ 2501 mutex_enter(&mcp->mc_lock); 2502 if (mcp->mc_status & MC_MEMORYLESS) { 2503 mutex_exit(&mcp->mc_lock); 2504 return (DDI_SUCCESS); 2505 } 2506 2507 if (mcp->mc_status & MC_POLL_RUNNING) { 2508 mcp->mc_status &= ~MC_POLL_RUNNING; 2509 } 2510 mcp->mc_status |= flag; 2511 mutex_exit(&mcp->mc_lock); 2512 2513 return (DDI_SUCCESS); 2514 } 2515 2516 /* caller must clear the SUSPEND bits or this will do nothing */ 2517 2518 int 2519 mc_resume(mc_opl_t *mcp, uint32_t flag) 2520 { 2521 int i; 2522 uint64_t basepa; 2523 2524 mutex_enter(&mcp->mc_lock); 2525 if (mcp->mc_status & MC_MEMORYLESS) { 2526 mutex_exit(&mcp->mc_lock); 2527 return (DDI_SUCCESS); 2528 } 2529 basepa = mcp->mc_start_address; 2530 if (get_base_address(mcp) == DDI_FAILURE) { 2531 mutex_exit(&mcp->mc_lock); 2532 return (DDI_FAILURE); 2533 } 2534 2535 if (basepa != mcp->mc_start_address) { 2536 if (mcp->mlist) 2537 mc_memlist_delete(mcp->mlist); 2538 mcp->mlist = NULL; 2539 mc_get_mlist(mcp); 2540 } 2541 2542 mcp->mc_status &= ~flag; 2543 2544 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 2545 mutex_exit(&mcp->mc_lock); 2546 return (DDI_SUCCESS); 2547 } 2548 2549 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2550 /* restart memory patrol checking */ 2551 mcp->mc_status |= MC_POLL_RUNNING; 2552 for (i = 0; i < BANKNUM_PER_SB; i++) { 2553 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2554 restart_patrol(mcp, i, NULL); 2555 } 2556 } 2557 } 2558 mutex_exit(&mcp->mc_lock); 2559 2560 return (DDI_SUCCESS); 2561 } 2562 2563 static mc_opl_t * 2564 mc_pa_to_mcp(uint64_t pa) 2565 { 2566 mc_opl_t *mcp; 2567 int i; 2568 2569 ASSERT(MUTEX_HELD(&mcmutex)); 2570 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2571 if ((mcp = mc_instances[i]) == NULL) 2572 continue; 2573 /* if mac patrol is suspended, we cannot rely on it */ 2574 if (!(mcp->mc_status & MC_POLL_RUNNING) || 2575 (mcp->mc_status & MC_SOFT_SUSPENDED)) 2576 continue; 2577 if ((mcp->mc_start_address <= pa) && 2578 (pa < (mcp->mc_start_address + mcp->mc_size))) { 2579 return (mcp); 2580 } 2581 } 2582 return (NULL); 2583 } 2584 2585 /* 2586 * Get Physical Board number from Logical one. 2587 */ 2588 static int 2589 mc_opl_get_physical_board(int sb) 2590 { 2591 if (&opl_get_physical_board) { 2592 return (opl_get_physical_board(sb)); 2593 } 2594 2595 cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n"); 2596 return (-1); 2597 } 2598 2599 /* ARGSUSED */ 2600 int 2601 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen, 2602 int *lenp) 2603 { 2604 int i; 2605 int sb; 2606 int bank; 2607 mc_opl_t *mcp; 2608 char memb_num; 2609 2610 mutex_enter(&mcmutex); 2611 2612 if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) || 2613 (!pa_is_valid(mcp, flt_addr))) { 2614 mutex_exit(&mcmutex); 2615 if (snprintf(buf, buflen, "UNKNOWN") >= buflen) { 2616 return (ENOSPC); 2617 } else { 2618 if (lenp) 2619 *lenp = strlen(buf); 2620 } 2621 return (0); 2622 } 2623 2624 bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address); 2625 sb = mc_opl_get_physical_board(mcp->mc_board_num); 2626 2627 if (sb == -1) { 2628 mutex_exit(&mcmutex); 2629 return (ENXIO); 2630 } 2631 2632 if (plat_model == MODEL_DC) { 2633 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 2634 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s MEM%s MEM%s", 2635 model_names[plat_model].unit_name, sb, 2636 mc_dc_dimm_unum_table[i], mc_dc_dimm_unum_table[i + 1], 2637 mc_dc_dimm_unum_table[i + 2], mc_dc_dimm_unum_table[i + 3]); 2638 } else { 2639 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 2640 memb_num = mc_ff_dimm_unum_table[i][0]; 2641 snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s MEM%s MEM%s", 2642 model_names[plat_model].unit_name, 2643 model_names[plat_model].mem_name, memb_num, 2644 &mc_ff_dimm_unum_table[i][1], 2645 2646 &mc_ff_dimm_unum_table[i + 1][1], 2647 &mc_ff_dimm_unum_table[i + 2][1], 2648 &mc_ff_dimm_unum_table[i + 3][1]); 2649 } 2650 if (lenp) { 2651 *lenp = strlen(buf); 2652 } 2653 mutex_exit(&mcmutex); 2654 return (0); 2655 } 2656 2657 int 2658 opl_mc_suspend(void) 2659 { 2660 mc_opl_t *mcp; 2661 int i; 2662 2663 mutex_enter(&mcmutex); 2664 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2665 if ((mcp = mc_instances[i]) == NULL) 2666 continue; 2667 mc_suspend(mcp, MC_SOFT_SUSPENDED); 2668 } 2669 mutex_exit(&mcmutex); 2670 2671 return (0); 2672 } 2673 2674 int 2675 opl_mc_resume(void) 2676 { 2677 mc_opl_t *mcp; 2678 int i; 2679 2680 mutex_enter(&mcmutex); 2681 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2682 if ((mcp = mc_instances[i]) == NULL) 2683 continue; 2684 mc_resume(mcp, MC_SOFT_SUSPENDED); 2685 } 2686 mutex_exit(&mcmutex); 2687 2688 return (0); 2689 } 2690 static void 2691 insert_mcp(mc_opl_t *mcp) 2692 { 2693 mutex_enter(&mcmutex); 2694 if (mc_instances[mcp->mc_board_num] != NULL) { 2695 MC_LOG("mc-opl instance for board# %d already exists\n", 2696 mcp->mc_board_num); 2697 } 2698 mc_instances[mcp->mc_board_num] = mcp; 2699 mutex_exit(&mcmutex); 2700 } 2701 2702 static void 2703 delete_mcp(mc_opl_t *mcp) 2704 { 2705 mutex_enter(&mcmutex); 2706 mc_instances[mcp->mc_board_num] = 0; 2707 mutex_exit(&mcmutex); 2708 } 2709 2710 /* Error injection interface */ 2711 2712 static void 2713 mc_lock_va(uint64_t pa, caddr_t new_va) 2714 { 2715 tte_t tte; 2716 2717 vtag_flushpage(new_va, KCONTEXT); 2718 sfmmu_memtte(&tte, pa >> PAGESHIFT, 2719 PROC_DATA|HAT_NOSYNC, TTE8K); 2720 tte.tte_intlo |= TTE_LCK_INT; 2721 sfmmu_dtlb_ld_kva(new_va, &tte); 2722 } 2723 2724 static void 2725 mc_unlock_va(caddr_t va) 2726 { 2727 vtag_flushpage(va, (uint64_t)ksfmmup); 2728 } 2729 2730 /* ARGSUSED */ 2731 int 2732 mc_inject_error(int error_type, uint64_t pa, uint32_t flags) 2733 { 2734 mc_opl_t *mcp; 2735 int bank; 2736 uint32_t dimm_addr; 2737 uint32_t cntl; 2738 mc_addr_info_t maddr; 2739 uint32_t data, stat; 2740 int both_sides = 0; 2741 uint64_t pa0; 2742 int extra_injection_needed = 0; 2743 extern void cpu_flush_ecache(void); 2744 2745 MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags); 2746 2747 mutex_enter(&mcmutex); 2748 if ((mcp = mc_pa_to_mcp(pa)) == NULL) { 2749 mutex_exit(&mcmutex); 2750 MC_LOG("mc_inject_error: invalid pa\n"); 2751 return (ENOTSUP); 2752 } 2753 2754 mutex_enter(&mcp->mc_lock); 2755 mutex_exit(&mcmutex); 2756 2757 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 2758 mutex_exit(&mcp->mc_lock); 2759 MC_LOG("mc-opl has been suspended. No error injection.\n"); 2760 return (EBUSY); 2761 } 2762 2763 /* convert pa to offset within the board */ 2764 MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address); 2765 2766 if (!pa_is_valid(mcp, pa)) { 2767 mutex_exit(&mcp->mc_lock); 2768 return (EINVAL); 2769 } 2770 2771 pa0 = pa - mcp->mc_start_address; 2772 2773 bank = pa_to_bank(mcp, pa0); 2774 2775 if (flags & MC_INJECT_FLAG_OTHER) 2776 bank = bank ^ 1; 2777 2778 if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) { 2779 mutex_exit(&mcp->mc_lock); 2780 MC_LOG("Not mirror mode\n"); 2781 return (EINVAL); 2782 } 2783 2784 dimm_addr = pa_to_dimm(mcp, pa0); 2785 2786 MC_LOG("injecting error to /LSB%d/B%d/%x\n", 2787 mcp->mc_board_num, bank, dimm_addr); 2788 2789 2790 switch (error_type) { 2791 case MC_INJECT_INTERMITTENT_MCE: 2792 case MC_INJECT_PERMANENT_MCE: 2793 case MC_INJECT_MUE: 2794 both_sides = 1; 2795 } 2796 2797 if (flags & MC_INJECT_FLAG_RESET) 2798 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0); 2799 2800 ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK); 2801 2802 if (both_sides) { 2803 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0); 2804 ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), 2805 dimm_addr & MAC_EG_ADD_MASK); 2806 } 2807 2808 switch (error_type) { 2809 case MC_INJECT_SUE: 2810 extra_injection_needed = 1; 2811 /*FALLTHROUGH*/ 2812 case MC_INJECT_UE: 2813 case MC_INJECT_MUE: 2814 if (flags & MC_INJECT_FLAG_PATH) { 2815 cntl = MAC_EG_ADD_FIX 2816 |MAC_EG_FORCE_READ00|MAC_EG_FORCE_READ16 2817 |MAC_EG_RDERR_ONCE; 2818 } else { 2819 cntl = MAC_EG_ADD_FIX|MAC_EG_FORCE_DERR00 2820 |MAC_EG_FORCE_DERR16|MAC_EG_DERR_ONCE; 2821 } 2822 flags |= MC_INJECT_FLAG_ST; 2823 break; 2824 case MC_INJECT_INTERMITTENT_CE: 2825 case MC_INJECT_INTERMITTENT_MCE: 2826 if (flags & MC_INJECT_FLAG_PATH) { 2827 cntl = MAC_EG_ADD_FIX 2828 |MAC_EG_FORCE_READ00 2829 |MAC_EG_RDERR_ONCE; 2830 } else { 2831 cntl = MAC_EG_ADD_FIX 2832 |MAC_EG_FORCE_DERR16 2833 |MAC_EG_DERR_ONCE; 2834 } 2835 extra_injection_needed = 1; 2836 flags |= MC_INJECT_FLAG_ST; 2837 break; 2838 case MC_INJECT_PERMANENT_CE: 2839 case MC_INJECT_PERMANENT_MCE: 2840 if (flags & MC_INJECT_FLAG_PATH) { 2841 cntl = MAC_EG_ADD_FIX 2842 |MAC_EG_FORCE_READ00 2843 |MAC_EG_RDERR_ALWAYS; 2844 } else { 2845 cntl = MAC_EG_ADD_FIX 2846 |MAC_EG_FORCE_DERR16 2847 |MAC_EG_DERR_ALWAYS; 2848 } 2849 flags |= MC_INJECT_FLAG_ST; 2850 break; 2851 case MC_INJECT_CMPE: 2852 data = 0xabcdefab; 2853 stphys(pa, data); 2854 cpu_flush_ecache(); 2855 MC_LOG("CMPE: writing data %x to %lx\n", data, pa); 2856 ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE); 2857 stphys(pa, data ^ 0xffffffff); 2858 cpu_flush_ecache(); 2859 ST_MAC_REG(MAC_MIRR(mcp, bank), 0); 2860 MC_LOG("CMPE: write new data %xto %lx\n", data, pa); 2861 cntl = 0; 2862 break; 2863 case MC_INJECT_NOP: 2864 cntl = 0; 2865 break; 2866 default: 2867 MC_LOG("mc_inject_error: invalid option\n"); 2868 cntl = 0; 2869 } 2870 2871 if (cntl) { 2872 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK); 2873 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 2874 2875 if (both_sides) { 2876 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 2877 MAC_EG_SETUP_MASK); 2878 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 2879 } 2880 } 2881 2882 /* 2883 * For all injection cases except compare error, we 2884 * must write to the PA to trigger the error. 2885 */ 2886 2887 if (flags & MC_INJECT_FLAG_ST) { 2888 data = 0xf0e0d0c0; 2889 MC_LOG("Writing %x to %lx\n", data, pa); 2890 stphys(pa, data); 2891 cpu_flush_ecache(); 2892 } 2893 2894 2895 if (flags & MC_INJECT_FLAG_LD) { 2896 if (flags & MC_INJECT_FLAG_PREFETCH) { 2897 /* 2898 * Use strong prefetch operation to 2899 * inject MI errors. 2900 */ 2901 page_t *pp; 2902 extern void mc_prefetch(caddr_t); 2903 2904 MC_LOG("prefetch\n"); 2905 2906 pp = page_numtopp_nolock(pa >> PAGESHIFT); 2907 if (pp != NULL) { 2908 caddr_t va, va1; 2909 2910 va = ppmapin(pp, PROT_READ|PROT_WRITE, 2911 (caddr_t)-1); 2912 kpreempt_disable(); 2913 mc_lock_va((uint64_t)pa, va); 2914 va1 = va + (pa & (PAGESIZE - 1)); 2915 mc_prefetch(va1); 2916 mc_unlock_va(va); 2917 kpreempt_enable(); 2918 ppmapout(va); 2919 2920 /* 2921 * For MI errors, we need one extra 2922 * injection for HW patrol to stop. 2923 */ 2924 extra_injection_needed = 1; 2925 } else { 2926 cmn_err(CE_WARN, "Cannot find page structure" 2927 " for PA %lx\n", pa); 2928 } 2929 } else { 2930 MC_LOG("Reading from %lx\n", pa); 2931 data = ldphys(pa); 2932 MC_LOG("data = %x\n", data); 2933 } 2934 2935 if (extra_injection_needed) { 2936 /* 2937 * These are the injection cases where the 2938 * requested injected errors will not cause the HW 2939 * patrol to stop. For these cases, we need to inject 2940 * an extra 'real' PTRL error to force the 2941 * HW patrol to stop so that we can report the 2942 * errors injected. Note that we cannot read 2943 * and report error status while the HW patrol 2944 * is running. 2945 */ 2946 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 2947 cntl & MAC_EG_SETUP_MASK); 2948 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 2949 2950 if (both_sides) { 2951 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 2952 MAC_EG_SETUP_MASK); 2953 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 2954 } 2955 data = 0xf0e0d0c0; 2956 MC_LOG("Writing %x to %lx\n", data, pa); 2957 stphys(pa, data); 2958 cpu_flush_ecache(); 2959 } 2960 } 2961 2962 if (flags & MC_INJECT_FLAG_RESTART) { 2963 MC_LOG("Restart patrol\n"); 2964 maddr.mi_maddr.ma_bd = mcp->mc_board_num; 2965 maddr.mi_maddr.ma_bank = bank; 2966 maddr.mi_maddr.ma_dimm_addr = dimm_addr; 2967 maddr.mi_valid = 1; 2968 maddr.mi_advance = 0; 2969 restart_patrol(mcp, bank, &maddr); 2970 } 2971 2972 if (flags & MC_INJECT_FLAG_POLL) { 2973 int running; 2974 2975 MC_LOG("Poll patrol error\n"); 2976 stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank)); 2977 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 2978 running = cntl & MAC_CNTL_PTRL_START; 2979 2980 if (!running && 2981 (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) { 2982 /* 2983 * HW patrol stopped and we have errors to 2984 * report. Do it. 2985 */ 2986 mcp->mc_speedup_period[bank] = 0; 2987 maddr.mi_valid = 0; 2988 maddr.mi_advance = 1; 2989 if (IS_MIRROR(mcp, bank)) 2990 mc_error_handler_mir(mcp, bank, &maddr); 2991 else 2992 mc_error_handler(mcp, bank, &maddr); 2993 2994 restart_patrol(mcp, bank, &maddr); 2995 } else { 2996 /* 2997 * We are expecting to report injected 2998 * errors but the HW patrol is still running. 2999 * Speed up the scanning 3000 */ 3001 mcp->mc_speedup_period[bank] = 2; 3002 MAC_CMD(mcp, bank, 0); 3003 restart_patrol(mcp, bank, NULL); 3004 } 3005 } 3006 3007 mutex_exit(&mcp->mc_lock); 3008 return (0); 3009 } 3010 3011 void 3012 mc_stphysio(uint64_t pa, uint32_t data) 3013 { 3014 MC_LOG("0x%x -> pa(%lx)\n", data, pa); 3015 stphysio(pa, data); 3016 3017 /* force the above write to be processed by mac patrol */ 3018 data = ldphysio(pa); 3019 MC_LOG("pa(%lx) = 0x%x\n", pa, data); 3020 } 3021 3022 uint32_t 3023 mc_ldphysio(uint64_t pa) 3024 { 3025 uint32_t rv; 3026 3027 rv = ldphysio(pa); 3028 MC_LOG("pa(%lx) = 0x%x\n", pa, rv); 3029 return (rv); 3030 } 3031 3032 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9') 3033 3034 /* 3035 * parse_unum_memory -- extract the board number and the DIMM name from 3036 * the unum. 3037 * 3038 * Return 0 for success and non-zero for a failure. 3039 */ 3040 int 3041 parse_unum_memory(char *unum, int *board, char *dname) 3042 { 3043 char *c; 3044 char x, y, z; 3045 3046 if ((c = strstr(unum, "CMU")) != NULL) { 3047 /* DC Model */ 3048 c += 3; 3049 *board = (uint8_t)stoi(&c); 3050 if ((c = strstr(c, "MEM")) == NULL) { 3051 return (1); 3052 } 3053 c += 3; 3054 if (strlen(c) < 3) { 3055 return (2); 3056 } 3057 if ((!isdigit(c[0])) || (!(isdigit(c[1]))) || 3058 ((c[2] != 'A') && (c[2] != 'B'))) { 3059 return (3); 3060 } 3061 x = c[0]; 3062 y = c[1]; 3063 z = c[2]; 3064 } else if ((c = strstr(unum, "MBU_")) != NULL) { 3065 /* FF1/FF2 Model */ 3066 c += 4; 3067 if ((c[0] != 'A') && (c[0] != 'B')) { 3068 return (4); 3069 } 3070 if ((c = strstr(c, "MEMB")) == NULL) { 3071 return (5); 3072 } 3073 c += 4; 3074 3075 x = c[0]; 3076 *board = ((uint8_t)stoi(&c)) / 4; 3077 if ((c = strstr(c, "MEM")) == NULL) { 3078 return (6); 3079 } 3080 c += 3; 3081 if (strlen(c) < 2) { 3082 return (7); 3083 } 3084 if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) { 3085 return (8); 3086 } 3087 y = c[0]; 3088 z = c[1]; 3089 } else { 3090 return (9); 3091 } 3092 if (*board < 0) { 3093 return (10); 3094 } 3095 dname[0] = x; 3096 dname[1] = y; 3097 dname[2] = z; 3098 dname[3] = '\0'; 3099 return (0); 3100 } 3101 3102 /* 3103 * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and 3104 * the DIMM name. 3105 */ 3106 int 3107 mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 3108 int buflen, int *lenp) 3109 { 3110 int ret = ENODEV; 3111 mc_dimm_info_t *d = NULL; 3112 3113 if ((d = mcp->mc_dimm_list) == NULL) 3114 return (ENOTSUP); 3115 3116 for (; d != NULL; d = d->md_next) { 3117 if (strcmp(d->md_dimmname, dname) == 0) { 3118 break; 3119 } 3120 } 3121 if (d != NULL) { 3122 *lenp = strlen(d->md_serial) + strlen(d->md_partnum); 3123 if (buflen <= *lenp) { 3124 cmn_err(CE_WARN, "mc_get_mem_sid_dimm: " 3125 "buflen is smaller than %d\n", *lenp); 3126 ret = ENOSPC; 3127 } else { 3128 snprintf(buf, buflen, "%s:%s", 3129 d->md_serial, d->md_partnum); 3130 ret = 0; 3131 } 3132 } 3133 MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n", 3134 ret, dname, (ret == 0) ? buf : ""); 3135 return (ret); 3136 } 3137 3138 int 3139 mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, 3140 int bank, uint32_t mf_type, uint32_t d_slot) 3141 { 3142 int sb; 3143 int lenp = buflen; 3144 int id; 3145 int ret; 3146 char *dimmnm; 3147 3148 if ((sb = mc_opl_get_physical_board(lsb)) < 0) { 3149 return (ENODEV); 3150 } 3151 3152 if (mf_type == FLT_TYPE_PERMANENT_CE) { 3153 if (plat_model == MODEL_DC) { 3154 id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 3155 } else { 3156 id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 3157 } 3158 dimmnm = mc_dc_dimm_unum_table[id]; 3159 if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen, 3160 &lenp)) != 0) { 3161 return (ret); 3162 } 3163 } else { 3164 return (1); 3165 } 3166 3167 return (0); 3168 } 3169 3170 /* 3171 * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum. 3172 */ 3173 int 3174 mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 3175 { 3176 int i; 3177 int ret = ENODEV; 3178 int board; 3179 char dname[MCOPL_MAX_DIMMNAME + 1]; 3180 mc_opl_t *mcp; 3181 3182 MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen); 3183 if ((ret = parse_unum_memory(unum, &board, dname)) != 0) { 3184 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3185 unum, ret); 3186 return (EINVAL); 3187 } 3188 3189 if (board < 0) { 3190 MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n", 3191 board, dname); 3192 return (EINVAL); 3193 } 3194 3195 mutex_enter(&mcmutex); 3196 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3197 if ((mcp = mc_instances[i]) == NULL) 3198 continue; 3199 mutex_enter(&mcp->mc_lock); 3200 if (mcp->mc_board_num == board) { 3201 ret = mc_get_mem_sid_dimm(mcp, dname, buf, 3202 buflen, lenp); 3203 mutex_exit(&mcp->mc_lock); 3204 break; 3205 } 3206 mutex_exit(&mcp->mc_lock); 3207 } 3208 mutex_exit(&mcmutex); 3209 return (ret); 3210 } 3211 3212 /* 3213 * mc_get_mem_offset -- get the offset in a DIMM for a given physical address. 3214 */ 3215 int 3216 mc_get_mem_offset(uint64_t paddr, uint64_t *offp) 3217 { 3218 int i; 3219 int ret = ENODEV; 3220 mc_addr_t maddr; 3221 mc_opl_t *mcp; 3222 3223 mutex_enter(&mcmutex); 3224 for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) { 3225 if ((mcp = mc_instances[i]) == NULL) 3226 continue; 3227 mutex_enter(&mcp->mc_lock); 3228 if (!pa_is_valid(mcp, paddr)) { 3229 mutex_exit(&mcp->mc_lock); 3230 continue; 3231 } 3232 if (pa_to_maddr(mcp, paddr, &maddr) == 0) { 3233 *offp = maddr.ma_dimm_addr; 3234 ret = 0; 3235 } 3236 mutex_exit(&mcp->mc_lock); 3237 } 3238 mutex_exit(&mcmutex); 3239 MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n", 3240 ret, paddr, *offp); 3241 return (ret); 3242 } 3243 3244 /* 3245 * dname_to_bankslot - Get the bank and slot number from the DIMM name. 3246 */ 3247 int 3248 dname_to_bankslot(char *dname, int *bank, int *slot) 3249 { 3250 int i; 3251 int tsz; 3252 char **tbl; 3253 3254 if (plat_model == MODEL_DC) { /* DC */ 3255 tbl = mc_dc_dimm_unum_table; 3256 tsz = OPL_MAX_DIMMS; 3257 } else { 3258 tbl = mc_ff_dimm_unum_table; 3259 tsz = 2 * OPL_MAX_DIMMS; 3260 } 3261 3262 for (i = 0; i < tsz; i++) { 3263 if (strcmp(dname, tbl[i]) == 0) { 3264 break; 3265 } 3266 } 3267 if (i == tsz) { 3268 return (1); 3269 } 3270 *bank = INDEX_TO_BANK(i); 3271 *slot = INDEX_TO_SLOT(i); 3272 return (0); 3273 } 3274 3275 /* 3276 * mc_get_mem_addr -- get the physical address of a DIMM corresponding 3277 * to the unum and sid. 3278 */ 3279 int 3280 mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr) 3281 { 3282 int board; 3283 int bank; 3284 int slot; 3285 int i; 3286 int ret = ENODEV; 3287 char dname[MCOPL_MAX_DIMMNAME + 1]; 3288 mc_addr_t maddr; 3289 mc_opl_t *mcp; 3290 3291 MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n", 3292 unum, sid, offset); 3293 if (parse_unum_memory(unum, &board, dname) != 0) { 3294 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3295 unum, ret); 3296 return (EINVAL); 3297 } 3298 3299 if (board < 0) { 3300 MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n", 3301 board, dname); 3302 return (EINVAL); 3303 } 3304 3305 mutex_enter(&mcmutex); 3306 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3307 if ((mcp = mc_instances[i]) == NULL) 3308 continue; 3309 mutex_enter(&mcp->mc_lock); 3310 if (mcp->mc_board_num != board) { 3311 mutex_exit(&mcp->mc_lock); 3312 continue; 3313 } 3314 3315 ret = dname_to_bankslot(dname, &bank, &slot); 3316 MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot); 3317 if (ret != 0) { 3318 MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n"); 3319 ret = ENODEV; 3320 } else { 3321 maddr.ma_bd = board; 3322 maddr.ma_bank = bank; 3323 maddr.ma_dimm_addr = offset; 3324 ret = mcaddr_to_pa(mcp, &maddr, paddr); 3325 if (ret != 0) { 3326 MC_LOG("mc_get_mem_addr: " 3327 "mcaddr_to_pa failed\n"); 3328 ret = ENODEV; 3329 } 3330 } 3331 mutex_exit(&mcp->mc_lock); 3332 } 3333 mutex_exit(&mcmutex); 3334 MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr); 3335 return (ret); 3336 } 3337 3338 static void 3339 mc_free_dimm_list(mc_dimm_info_t *d) 3340 { 3341 mc_dimm_info_t *next; 3342 3343 while (d != NULL) { 3344 next = d->md_next; 3345 kmem_free(d, sizeof (mc_dimm_info_t)); 3346 d = next; 3347 } 3348 } 3349 3350 /* 3351 * mc_get_dimm_list -- get the list of dimms with serial-id info 3352 * from the SP. 3353 */ 3354 mc_dimm_info_t * 3355 mc_get_dimm_list(mc_opl_t *mcp) 3356 { 3357 uint32_t bufsz; 3358 uint32_t maxbufsz; 3359 int ret; 3360 int sexp; 3361 board_dimm_info_t *bd_dimmp; 3362 mc_dimm_info_t *dimm_list = NULL; 3363 3364 maxbufsz = bufsz = sizeof (board_dimm_info_t) + 3365 ((MCOPL_MAX_DIMMNAME + MCOPL_MAX_SERIAL + 3366 MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS); 3367 3368 bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP); 3369 ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz); 3370 3371 MC_LOG("mc_get_dimm_list: scf_service_getinfo returned=%d\n", ret); 3372 if (ret == 0) { 3373 sexp = sizeof (board_dimm_info_t) + 3374 ((bd_dimmp->bd_dnamesz + bd_dimmp->bd_serialsz + 3375 bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms); 3376 3377 if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) && 3378 (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) && 3379 (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) && 3380 (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) && 3381 (sexp <= bufsz)) { 3382 3383 #ifdef DEBUG 3384 if (oplmc_debug) 3385 mc_dump_dimm_info(bd_dimmp); 3386 #endif 3387 dimm_list = mc_prepare_dimmlist(bd_dimmp); 3388 3389 } else { 3390 cmn_err(CE_WARN, "DIMM info version mismatch\n"); 3391 } 3392 } 3393 kmem_free(bd_dimmp, maxbufsz); 3394 MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", dimm_list); 3395 return (dimm_list); 3396 } 3397 3398 /* 3399 * mc_prepare_dimmlist - Prepare the dimm list from the infomation 3400 * recieved from the SP. 3401 */ 3402 mc_dimm_info_t * 3403 mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp) 3404 { 3405 char *dimm_name; 3406 char *serial; 3407 char *part; 3408 int dimm; 3409 int dnamesz = bd_dimmp->bd_dnamesz; 3410 int sersz = bd_dimmp->bd_serialsz; 3411 int partsz = bd_dimmp->bd_partnumsz; 3412 mc_dimm_info_t *dimm_list = NULL; 3413 mc_dimm_info_t *d; 3414 3415 dimm_name = (char *)(bd_dimmp + 1); 3416 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3417 3418 d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t), 3419 KM_SLEEP); 3420 snprintf(d->md_dimmname, dnamesz + 1, "%s", dimm_name); 3421 serial = dimm_name + dnamesz; 3422 snprintf(d->md_serial, sersz + 1, "%s", serial); 3423 part = serial + sersz; 3424 snprintf(d->md_partnum, partsz + 1, "%s", part); 3425 3426 d->md_next = dimm_list; 3427 dimm_list = d; 3428 dimm_name = part + partsz; 3429 } 3430 return (dimm_list); 3431 } 3432 3433 #ifdef DEBUG 3434 void 3435 mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz) 3436 { 3437 char dname[MCOPL_MAX_DIMMNAME + 1]; 3438 char serial[MCOPL_MAX_SERIAL + 1]; 3439 char part[ MCOPL_MAX_PARTNUM + 1]; 3440 char *b; 3441 3442 b = buf; 3443 snprintf(dname, dnamesz + 1, "%s", b); 3444 b += dnamesz; 3445 snprintf(serial, serialsz + 1, "%s", b); 3446 b += serialsz; 3447 snprintf(part, partnumsz + 1, "%s", b); 3448 printf("DIMM=%s Serial=%s PartNum=%s\n", dname, serial, part); 3449 } 3450 3451 void 3452 mc_dump_dimm_info(board_dimm_info_t *bd_dimmp) 3453 { 3454 int dimm; 3455 int dnamesz = bd_dimmp->bd_dnamesz; 3456 int sersz = bd_dimmp->bd_serialsz; 3457 int partsz = bd_dimmp->bd_partnumsz; 3458 char *buf; 3459 3460 printf("Version=%d Board=%02d DIMMs=%d NameSize=%d " 3461 "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version, 3462 bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz, 3463 bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz); 3464 printf("======================================================\n"); 3465 3466 buf = (char *)(bd_dimmp + 1); 3467 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3468 mc_dump_dimm(buf, dnamesz, sersz, partsz); 3469 buf += dnamesz + sersz + partsz; 3470 } 3471 printf("======================================================\n"); 3472 } 3473 3474 3475 /* ARGSUSED */ 3476 static int 3477 mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 3478 int *rvalp) 3479 { 3480 caddr_t buf; 3481 uint64_t pa; 3482 int rv = 0; 3483 int i; 3484 uint32_t flags; 3485 static uint32_t offset = 0; 3486 3487 3488 flags = (cmd >> 4) & 0xfffffff; 3489 3490 cmd &= 0xf; 3491 3492 MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags); 3493 3494 if (arg != NULL) { 3495 if (ddi_copyin((const void *)arg, (void *)&pa, 3496 sizeof (uint64_t), 0) < 0) { 3497 rv = EFAULT; 3498 return (rv); 3499 } 3500 buf = NULL; 3501 } else { 3502 buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP); 3503 3504 pa = va_to_pa(buf); 3505 pa += offset; 3506 3507 offset += 64; 3508 if (offset >= PAGESIZE) 3509 offset = 0; 3510 } 3511 3512 switch (cmd) { 3513 case MCI_CE: 3514 mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, 3515 flags); 3516 break; 3517 case MCI_PERM_CE: 3518 mc_inject_error(MC_INJECT_PERMANENT_CE, pa, 3519 flags); 3520 break; 3521 case MCI_UE: 3522 mc_inject_error(MC_INJECT_UE, pa, 3523 flags); 3524 break; 3525 case MCI_M_CE: 3526 mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, 3527 flags); 3528 break; 3529 case MCI_M_PCE: 3530 mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, 3531 flags); 3532 break; 3533 case MCI_M_UE: 3534 mc_inject_error(MC_INJECT_MUE, pa, 3535 flags); 3536 break; 3537 case MCI_CMP: 3538 mc_inject_error(MC_INJECT_CMPE, pa, 3539 flags); 3540 break; 3541 case MCI_NOP: 3542 mc_inject_error(MC_INJECT_NOP, pa, flags); 3543 break; 3544 case MCI_SHOW_ALL: 3545 mc_debug_show_all = 1; 3546 break; 3547 case MCI_SHOW_NONE: 3548 mc_debug_show_all = 0; 3549 break; 3550 case MCI_ALLOC: 3551 /* 3552 * just allocate some kernel memory and never free it 3553 * 512 MB seems to be the maximum size supported. 3554 */ 3555 cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512); 3556 for (i = 0; i < flags; i++) { 3557 buf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP); 3558 cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n", 3559 (u_longlong_t)buf, (u_longlong_t)va_to_pa(buf)); 3560 } 3561 break; 3562 case MCI_SUSPEND: 3563 (void) opl_mc_suspend(); 3564 break; 3565 case MCI_RESUME: 3566 (void) opl_mc_resume(); 3567 break; 3568 default: 3569 rv = ENXIO; 3570 } 3571 return (rv); 3572 } 3573 3574 #endif /* DEBUG */ 3575