1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2008 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/conf.h> 34 #include <sys/modctl.h> 35 #include <sys/stat.h> 36 #include <sys/async.h> 37 #include <sys/machcpuvar.h> 38 #include <sys/machsystm.h> 39 #include <sys/promif.h> 40 #include <sys/ksynch.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/sunndi.h> 44 #include <sys/ddifm.h> 45 #include <sys/fm/protocol.h> 46 #include <sys/fm/util.h> 47 #include <sys/kmem.h> 48 #include <sys/fm/io/opl_mc_fm.h> 49 #include <sys/memlist.h> 50 #include <sys/param.h> 51 #include <sys/disp.h> 52 #include <vm/page.h> 53 #include <sys/mc-opl.h> 54 #include <sys/opl.h> 55 #include <sys/opl_dimm.h> 56 #include <sys/scfd/scfostoescf.h> 57 #include <sys/cpu_module.h> 58 #include <vm/seg_kmem.h> 59 #include <sys/vmem.h> 60 #include <vm/hat_sfmmu.h> 61 #include <sys/vmsystm.h> 62 #include <sys/membar.h> 63 64 /* 65 * Function prototypes 66 */ 67 static int mc_open(dev_t *, int, int, cred_t *); 68 static int mc_close(dev_t, int, int, cred_t *); 69 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 70 static int mc_attach(dev_info_t *, ddi_attach_cmd_t); 71 static int mc_detach(dev_info_t *, ddi_detach_cmd_t); 72 73 static int mc_poll_init(void); 74 static void mc_poll_fini(void); 75 static int mc_board_add(mc_opl_t *mcp); 76 static int mc_board_del(mc_opl_t *mcp); 77 static int mc_suspend(mc_opl_t *mcp, uint32_t flag); 78 static int mc_resume(mc_opl_t *mcp, uint32_t flag); 79 int opl_mc_suspend(void); 80 int opl_mc_resume(void); 81 82 static void insert_mcp(mc_opl_t *mcp); 83 static void delete_mcp(mc_opl_t *mcp); 84 85 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr); 86 87 static int mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa); 88 89 int mc_get_mem_unum(int, uint64_t, char *, int, int *); 90 int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr); 91 int mc_get_mem_offset(uint64_t paddr, uint64_t *offp); 92 int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp); 93 int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 94 int buflen, int *lenp); 95 mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp); 96 mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp); 97 int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank, 98 uint32_t mf_type, uint32_t d_slot); 99 static void mc_free_dimm_list(mc_dimm_info_t *d); 100 static void mc_get_mlist(mc_opl_t *); 101 static void mc_polling(void); 102 static int mc_opl_get_physical_board(int); 103 104 static void mc_clear_rewrite(mc_opl_t *mcp, int i); 105 static void mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state); 106 107 #ifdef DEBUG 108 static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *); 109 void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz); 110 void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp); 111 #endif 112 113 #pragma weak opl_get_physical_board 114 extern int opl_get_physical_board(int); 115 extern int plat_max_boards(void); 116 117 /* 118 * Configuration data structures 119 */ 120 static struct cb_ops mc_cb_ops = { 121 mc_open, /* open */ 122 mc_close, /* close */ 123 nulldev, /* strategy */ 124 nulldev, /* print */ 125 nodev, /* dump */ 126 nulldev, /* read */ 127 nulldev, /* write */ 128 mc_ioctl, /* ioctl */ 129 nodev, /* devmap */ 130 nodev, /* mmap */ 131 nodev, /* segmap */ 132 nochpoll, /* poll */ 133 ddi_prop_op, /* cb_prop_op */ 134 0, /* streamtab */ 135 D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flag */ 136 CB_REV, /* rev */ 137 nodev, /* cb_aread */ 138 nodev /* cb_awrite */ 139 }; 140 141 static struct dev_ops mc_ops = { 142 DEVO_REV, /* rev */ 143 0, /* refcnt */ 144 ddi_getinfo_1to1, /* getinfo */ 145 nulldev, /* identify */ 146 nulldev, /* probe */ 147 mc_attach, /* attach */ 148 mc_detach, /* detach */ 149 nulldev, /* reset */ 150 &mc_cb_ops, /* cb_ops */ 151 (struct bus_ops *)0, /* bus_ops */ 152 nulldev /* power */ 153 }; 154 155 /* 156 * Driver globals 157 */ 158 159 static enum { 160 MODEL_FF1, 161 MODEL_FF2, 162 MODEL_DC, 163 MODEL_IKKAKU 164 } plat_model = MODEL_DC; /* The default behaviour is DC */ 165 166 static struct plat_model_names { 167 const char *unit_name; 168 const char *mem_name; 169 } model_names[] = { 170 { "MBU_A", "MEMB" }, 171 { "MBU_B", "MEMB" }, 172 { "CMU", "" }, 173 { "MBU_A", "" } 174 }; 175 176 /* 177 * The DIMM Names for DC platform. 178 * The index into this table is made up of (bank, dslot), 179 * Where dslot occupies bits 0-1 and bank occupies 2-4. 180 */ 181 static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = { 182 /* --------CMUnn----------- */ 183 /* --CS0-----|--CS1------ */ 184 /* -H-|--L-- | -H- | -L-- */ 185 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 186 "13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */ 187 "23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */ 188 "33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */ 189 "01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */ 190 "11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */ 191 "21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */ 192 "31A", "30A", "31B", "30B" /* Bank 7 (MAC 3 bank 1) */ 193 }; 194 195 /* 196 * The DIMM Names for FF1/FF2/IKKAKU platforms. 197 * The index into this table is made up of (board, bank, dslot), 198 * Where dslot occupies bits 0-1, bank occupies 2-4 and 199 * board occupies the bit 5. 200 */ 201 static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = { 202 /* --------CMU0---------- */ 203 /* --CS0-----|--CS1------ */ 204 /* -H-|--L-- | -H- | -L-- */ 205 "03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */ 206 "01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */ 207 "13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */ 208 "11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */ 209 "23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */ 210 "21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */ 211 "33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */ 212 "31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */ 213 /* --------CMU1---------- */ 214 /* --CS0-----|--CS1------ */ 215 /* -H-|--L-- | -H- | -L-- */ 216 "43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */ 217 "41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */ 218 "53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */ 219 "51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */ 220 "63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */ 221 "61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */ 222 "73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */ 223 "71A", "70A", "71B", "70B" /* Bank 7 (MAC 3 bank 1) */ 224 }; 225 226 #define BD_BK_SLOT_TO_INDEX(bd, bk, s) \ 227 (((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03)) 228 229 #define INDEX_TO_BANK(i) (((i) & 0x1C) >> 2) 230 #define INDEX_TO_SLOT(i) ((i) & 0x03) 231 232 #define SLOT_TO_CS(slot) ((slot & 0x3) >> 1) 233 234 /* Isolation unit size is 64 MB */ 235 #define MC_ISOLATION_BSIZE (64 * 1024 * 1024) 236 237 #define MC_MAX_SPEEDS 7 238 239 typedef struct { 240 uint32_t mc_speeds; 241 uint32_t mc_period; 242 } mc_scan_speed_t; 243 244 #define MC_CNTL_SPEED_SHIFT 26 245 246 /* 247 * In mirror mode, we normalized the bank idx to "even" since 248 * the HW treats them as one unit w.r.t programming. 249 * This bank index will be the "effective" bank index. 250 * All mirrored bank state info on mc_period, mc_speedup_period 251 * will be stored in the even bank structure to avoid code duplication. 252 */ 253 #define MIRROR_IDX(bankidx) (bankidx & ~1) 254 255 static mc_scan_speed_t mc_scan_speeds[MC_MAX_SPEEDS] = { 256 {0x6 << MC_CNTL_SPEED_SHIFT, 0}, 257 {0x5 << MC_CNTL_SPEED_SHIFT, 32}, 258 {0x4 << MC_CNTL_SPEED_SHIFT, 64}, 259 {0x3 << MC_CNTL_SPEED_SHIFT, 128}, 260 {0x2 << MC_CNTL_SPEED_SHIFT, 256}, 261 {0x1 << MC_CNTL_SPEED_SHIFT, 512}, 262 {0x0 << MC_CNTL_SPEED_SHIFT, 1024} 263 }; 264 265 static uint32_t mc_max_speed = (0x6 << 26); 266 267 int mc_isolation_bsize = MC_ISOLATION_BSIZE; 268 int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC; 269 int mc_max_scf_retry = 16; 270 int mc_max_scf_logs = 64; 271 int mc_max_errlog_processed = BANKNUM_PER_SB*2; 272 int mc_scan_period = 12 * 60 * 60; /* 12 hours period */ 273 int mc_max_rewrite_loop = 100; 274 int mc_rewrite_delay = 10; 275 /* 276 * it takes SCF about 300 m.s. to process a requst. We can bail out 277 * if it is busy. It does not pay to wait for it too long. 278 */ 279 int mc_max_scf_loop = 2; 280 int mc_scf_delay = 100; 281 int mc_pce_dropped = 0; 282 int mc_poll_priority = MINCLSYSPRI; 283 int mc_max_rewrite_retry = 6 * 60; 284 285 286 /* 287 * Mutex hierarchy in mc-opl 288 * If both mcmutex and mc_lock must be held, 289 * mcmutex must be acquired first, and then mc_lock. 290 */ 291 292 static kmutex_t mcmutex; 293 mc_opl_t *mc_instances[OPL_MAX_BOARDS]; 294 295 static kmutex_t mc_polling_lock; 296 static kcondvar_t mc_polling_cv; 297 static kcondvar_t mc_poll_exit_cv; 298 static int mc_poll_cmd = 0; 299 static int mc_pollthr_running = 0; 300 int mc_timeout_period = 0; /* this is in m.s. */ 301 void *mc_statep; 302 303 #ifdef DEBUG 304 int oplmc_debug = 0; 305 #endif 306 307 static int mc_debug_show_all = 0; 308 309 extern struct mod_ops mod_driverops; 310 311 static struct modldrv modldrv = { 312 &mod_driverops, /* module type, this one is a driver */ 313 "OPL Memory-controller %I%", /* module name */ 314 &mc_ops, /* driver ops */ 315 }; 316 317 static struct modlinkage modlinkage = { 318 MODREV_1, /* rev */ 319 (void *)&modldrv, 320 NULL 321 }; 322 323 #pragma weak opl_get_mem_unum 324 #pragma weak opl_get_mem_sid 325 #pragma weak opl_get_mem_offset 326 #pragma weak opl_get_mem_addr 327 328 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 329 extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 330 extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 331 extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset, 332 uint64_t *paddr); 333 334 335 /* 336 * pseudo-mc node portid format 337 * 338 * [10] = 0 339 * [9] = 1 340 * [8] = LSB_ID[4] = 0 341 * [7:4] = LSB_ID[3:0] 342 * [3:0] = 0 343 * 344 */ 345 346 /* 347 * These are the module initialization routines. 348 */ 349 int 350 _init(void) 351 { 352 int error; 353 int plen; 354 char model[20]; 355 pnode_t node; 356 357 358 if ((error = ddi_soft_state_init(&mc_statep, 359 sizeof (mc_opl_t), 1)) != 0) 360 return (error); 361 362 if ((error = mc_poll_init()) != 0) { 363 ddi_soft_state_fini(&mc_statep); 364 return (error); 365 } 366 367 mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL); 368 if (&opl_get_mem_unum) 369 opl_get_mem_unum = mc_get_mem_unum; 370 if (&opl_get_mem_sid) 371 opl_get_mem_sid = mc_get_mem_sid; 372 if (&opl_get_mem_offset) 373 opl_get_mem_offset = mc_get_mem_offset; 374 if (&opl_get_mem_addr) 375 opl_get_mem_addr = mc_get_mem_addr; 376 377 node = prom_rootnode(); 378 plen = prom_getproplen(node, "model"); 379 380 if (plen > 0 && plen < sizeof (model)) { 381 (void) prom_getprop(node, "model", model); 382 model[plen] = '\0'; 383 if (strcmp(model, "FF1") == 0) 384 plat_model = MODEL_FF1; 385 else if (strcmp(model, "FF2") == 0) 386 plat_model = MODEL_FF2; 387 else if (strncmp(model, "DC", 2) == 0) 388 plat_model = MODEL_DC; 389 else if (strcmp(model, "IKKAKU") == 0) 390 plat_model = MODEL_IKKAKU; 391 } 392 393 error = mod_install(&modlinkage); 394 if (error != 0) { 395 if (&opl_get_mem_unum) 396 opl_get_mem_unum = NULL; 397 if (&opl_get_mem_sid) 398 opl_get_mem_sid = NULL; 399 if (&opl_get_mem_offset) 400 opl_get_mem_offset = NULL; 401 if (&opl_get_mem_addr) 402 opl_get_mem_addr = NULL; 403 mutex_destroy(&mcmutex); 404 mc_poll_fini(); 405 ddi_soft_state_fini(&mc_statep); 406 } 407 return (error); 408 } 409 410 int 411 _fini(void) 412 { 413 int error; 414 415 if ((error = mod_remove(&modlinkage)) != 0) 416 return (error); 417 418 if (&opl_get_mem_unum) 419 opl_get_mem_unum = NULL; 420 if (&opl_get_mem_sid) 421 opl_get_mem_sid = NULL; 422 if (&opl_get_mem_offset) 423 opl_get_mem_offset = NULL; 424 if (&opl_get_mem_addr) 425 opl_get_mem_addr = NULL; 426 427 mutex_destroy(&mcmutex); 428 mc_poll_fini(); 429 ddi_soft_state_fini(&mc_statep); 430 431 return (0); 432 } 433 434 int 435 _info(struct modinfo *modinfop) 436 { 437 return (mod_info(&modlinkage, modinfop)); 438 } 439 440 static void 441 mc_polling_thread() 442 { 443 mutex_enter(&mc_polling_lock); 444 mc_pollthr_running = 1; 445 while (!(mc_poll_cmd & MC_POLL_EXIT)) { 446 mc_polling(); 447 cv_timedwait(&mc_polling_cv, &mc_polling_lock, 448 ddi_get_lbolt() + mc_timeout_period); 449 } 450 mc_pollthr_running = 0; 451 452 /* 453 * signal if any one is waiting for this thread to exit. 454 */ 455 cv_signal(&mc_poll_exit_cv); 456 mutex_exit(&mc_polling_lock); 457 thread_exit(); 458 /* NOTREACHED */ 459 } 460 461 static int 462 mc_poll_init() 463 { 464 mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL); 465 cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL); 466 cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL); 467 return (0); 468 } 469 470 static void 471 mc_poll_fini() 472 { 473 mutex_enter(&mc_polling_lock); 474 if (mc_pollthr_running) { 475 mc_poll_cmd = MC_POLL_EXIT; 476 cv_signal(&mc_polling_cv); 477 while (mc_pollthr_running) { 478 cv_wait(&mc_poll_exit_cv, &mc_polling_lock); 479 } 480 } 481 mutex_exit(&mc_polling_lock); 482 mutex_destroy(&mc_polling_lock); 483 cv_destroy(&mc_polling_cv); 484 cv_destroy(&mc_poll_exit_cv); 485 } 486 487 static int 488 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 489 { 490 mc_opl_t *mcp; 491 int instance; 492 int rv; 493 494 /* get the instance of this devi */ 495 instance = ddi_get_instance(devi); 496 497 switch (cmd) { 498 case DDI_ATTACH: 499 break; 500 case DDI_RESUME: 501 mcp = ddi_get_soft_state(mc_statep, instance); 502 rv = mc_resume(mcp, MC_DRIVER_SUSPENDED); 503 return (rv); 504 default: 505 return (DDI_FAILURE); 506 } 507 508 if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS) 509 return (DDI_FAILURE); 510 511 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 512 goto bad; 513 } 514 515 if (mc_timeout_period == 0) { 516 mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi, 517 DDI_PROP_DONTPASS, "mc-timeout-interval-sec", 518 mc_patrol_interval_sec); 519 mc_timeout_period = drv_usectohz(1000000 * 520 mc_patrol_interval_sec / OPL_MAX_BOARDS); 521 } 522 523 /* set informations in mc state */ 524 mcp->mc_dip = devi; 525 526 if (mc_board_add(mcp)) 527 goto bad; 528 529 insert_mcp(mcp); 530 531 /* 532 * Start the polling thread if it is not running already. 533 */ 534 mutex_enter(&mc_polling_lock); 535 if (!mc_pollthr_running) { 536 (void) thread_create(NULL, 0, (void (*)())mc_polling_thread, 537 NULL, 0, &p0, TS_RUN, mc_poll_priority); 538 } 539 mutex_exit(&mc_polling_lock); 540 ddi_report_dev(devi); 541 542 return (DDI_SUCCESS); 543 544 bad: 545 ddi_soft_state_free(mc_statep, instance); 546 return (DDI_FAILURE); 547 } 548 549 /* ARGSUSED */ 550 static int 551 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 552 { 553 int rv; 554 int instance; 555 mc_opl_t *mcp; 556 557 /* get the instance of this devi */ 558 instance = ddi_get_instance(devi); 559 if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) { 560 return (DDI_FAILURE); 561 } 562 563 switch (cmd) { 564 case DDI_SUSPEND: 565 rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED); 566 return (rv); 567 case DDI_DETACH: 568 break; 569 default: 570 return (DDI_FAILURE); 571 } 572 573 delete_mcp(mcp); 574 if (mc_board_del(mcp) != DDI_SUCCESS) { 575 return (DDI_FAILURE); 576 } 577 578 /* free up the soft state */ 579 ddi_soft_state_free(mc_statep, instance); 580 581 return (DDI_SUCCESS); 582 } 583 584 /* ARGSUSED */ 585 static int 586 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp) 587 { 588 return (0); 589 } 590 591 /* ARGSUSED */ 592 static int 593 mc_close(dev_t devp, int flag, int otyp, cred_t *credp) 594 { 595 return (0); 596 } 597 598 /* ARGSUSED */ 599 static int 600 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 601 int *rvalp) 602 { 603 #ifdef DEBUG 604 return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp)); 605 #else 606 return (ENXIO); 607 #endif 608 } 609 610 /* 611 * PA validity check: 612 * This function return 1 if the PA is a valid PA 613 * in the running Solaris instance i.e. in physinstall 614 * Otherwise, return 0. 615 */ 616 617 /* ARGSUSED */ 618 static int 619 pa_is_valid(mc_opl_t *mcp, uint64_t addr) 620 { 621 if (mcp->mlist == NULL) 622 mc_get_mlist(mcp); 623 624 if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) { 625 return (1); 626 } 627 return (0); 628 } 629 630 /* 631 * mac-pa translation routines. 632 * 633 * Input: mc driver state, (LSB#, Bank#, DIMM address) 634 * Output: physical address 635 * 636 * Valid - return value: 0 637 * Invalid - return value: -1 638 */ 639 static int 640 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa) 641 { 642 int i; 643 uint64_t pa_offset = 0; 644 int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1; 645 int bank = maddr->ma_bank; 646 mc_addr_t maddr1; 647 int bank0, bank1; 648 649 MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank, 650 maddr->ma_dimm_addr); 651 652 /* loc validity check */ 653 ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd); 654 ASSERT(bank >= 0 && OPL_BANK_MAX > bank); 655 656 /* Do translation */ 657 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 658 int pa_bit = 0; 659 int mc_bit = mcp->mc_trans_table[cs][i]; 660 if (mc_bit < MC_ADDRESS_BITS) { 661 pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1; 662 } else if (mc_bit == MP_NONE) { 663 pa_bit = 0; 664 } else if (mc_bit == MP_BANK_0) { 665 pa_bit = bank & 1; 666 } else if (mc_bit == MP_BANK_1) { 667 pa_bit = (bank >> 1) & 1; 668 } else if (mc_bit == MP_BANK_2) { 669 pa_bit = (bank >> 2) & 1; 670 } 671 pa_offset |= ((uint64_t)pa_bit) << i; 672 } 673 *pa = mcp->mc_start_address + pa_offset; 674 MC_LOG("pa = %lx\n", *pa); 675 676 if (pa_to_maddr(mcp, *pa, &maddr1) == -1) { 677 cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to " 678 "convert PA %lx\n", maddr->ma_bd, bank, 679 maddr->ma_dimm_addr, *pa); 680 return (-1); 681 } 682 683 /* 684 * In mirror mode, PA is always translated to the even bank. 685 */ 686 if (IS_MIRROR(mcp, maddr->ma_bank)) { 687 bank0 = maddr->ma_bank & ~(1); 688 bank1 = maddr1.ma_bank & ~(1); 689 } else { 690 bank0 = maddr->ma_bank; 691 bank1 = maddr1.ma_bank; 692 } 693 /* 694 * there is no need to check ma_bd because it is generated from 695 * mcp. They are the same. 696 */ 697 if ((bank0 == bank1) && (maddr->ma_dimm_addr == 698 maddr1.ma_dimm_addr)) { 699 return (0); 700 } else { 701 cmn_err(CE_WARN, "Translation error source /LSB%d/B%d/%x, " 702 "PA %lx, target /LSB%d/B%d/%x\n", maddr->ma_bd, bank, 703 maddr->ma_dimm_addr, *pa, maddr1.ma_bd, maddr1.ma_bank, 704 maddr1.ma_dimm_addr); 705 return (-1); 706 } 707 } 708 709 /* 710 * PA to CS (used by pa_to_maddr). 711 */ 712 static int 713 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset) 714 { 715 int i; 716 int cs = 1; 717 718 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 719 /* MAC address bit<29> is arranged on the same PA bit */ 720 /* on both table. So we may use any table. */ 721 if (mcp->mc_trans_table[0][i] == CS_SHIFT) { 722 cs = (pa_offset >> i) & 1; 723 break; 724 } 725 } 726 return (cs); 727 } 728 729 /* 730 * PA to DIMM (used by pa_to_maddr). 731 */ 732 /* ARGSUSED */ 733 static uint32_t 734 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset) 735 { 736 int i; 737 int cs = pa_to_cs(mcp, pa_offset); 738 uint32_t dimm_addr = 0; 739 740 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 741 int pa_bit_value = (pa_offset >> i) & 1; 742 int mc_bit = mcp->mc_trans_table[cs][i]; 743 if (mc_bit < MC_ADDRESS_BITS) { 744 dimm_addr |= pa_bit_value << mc_bit; 745 } 746 } 747 dimm_addr |= cs << CS_SHIFT; 748 return (dimm_addr); 749 } 750 751 /* 752 * PA to Bank (used by pa_to_maddr). 753 */ 754 static int 755 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset) 756 { 757 int i; 758 int cs = pa_to_cs(mcp, pa_offset); 759 int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT]; 760 761 762 for (i = 0; i < PA_BITS_FOR_MAC; i++) { 763 int pa_bit_value = (pa_offset >> i) & 1; 764 int mc_bit = mcp->mc_trans_table[cs][i]; 765 switch (mc_bit) { 766 case MP_BANK_0: 767 bankno |= pa_bit_value; 768 break; 769 case MP_BANK_1: 770 bankno |= pa_bit_value << 1; 771 break; 772 case MP_BANK_2: 773 bankno |= pa_bit_value << 2; 774 break; 775 } 776 } 777 778 return (bankno); 779 } 780 781 /* 782 * PA to MAC address translation 783 * 784 * Input: MAC driver state, physicall adress 785 * Output: LSB#, Bank id, mac address 786 * 787 * Valid - return value: 0 788 * Invalid - return value: -1 789 */ 790 791 int 792 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr) 793 { 794 uint64_t pa_offset; 795 796 if (!mc_rangecheck_pa(mcp, pa)) 797 return (-1); 798 799 /* Do translation */ 800 pa_offset = pa - mcp->mc_start_address; 801 802 maddr->ma_bd = mcp->mc_board_num; 803 maddr->ma_phys_bd = mcp->mc_phys_board_num; 804 maddr->ma_bank = pa_to_bank(mcp, pa_offset); 805 maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset); 806 MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", pa_offset, maddr->ma_bd, 807 maddr->ma_bank, maddr->ma_dimm_addr); 808 return (0); 809 } 810 811 /* 812 * UNUM format for DC is "/CMUnn/MEMxyZ", where 813 * nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3. 814 * x = MAC 0..3 815 * y = 0..3 (slot info). 816 * Z = 'A' or 'B' 817 * 818 * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where 819 * x = 0..3 (MEMB number) 820 * y = 0..3 (slot info). 821 * Z = 'A' or 'B' 822 * 823 * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ", where 824 * x = 0..7 (MEMB number) 825 * y = 0..3 (slot info). 826 * Z = 'A' or 'B' 827 * 828 * UNUM format for IKKAKU is "/MBU_A/MEMyZ", where 829 * y = 0..3 (slot info). 830 * Z = 'A' or 'B' 831 * 832 */ 833 int 834 mc_set_mem_unum(char *buf, int buflen, int sb, int bank, 835 uint32_t mf_type, uint32_t d_slot) 836 { 837 char *dimmnm; 838 char memb_num; 839 int cs; 840 int i; 841 int j; 842 843 cs = SLOT_TO_CS(d_slot); 844 845 switch (plat_model) { 846 case MODEL_DC: 847 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 848 mf_type == FLT_TYPE_PERMANENT_CE) { 849 i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 850 dimmnm = mc_dc_dimm_unum_table[i]; 851 snprintf(buf, buflen, "/%s%02d/MEM%s", 852 model_names[plat_model].unit_name, sb, dimmnm); 853 } else { 854 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 855 j = (cs == 0) ? i : i + 2; 856 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s", 857 model_names[plat_model].unit_name, sb, 858 mc_dc_dimm_unum_table[j], 859 mc_dc_dimm_unum_table[j + 1]); 860 } 861 break; 862 case MODEL_FF1: 863 case MODEL_FF2: 864 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 865 mf_type == FLT_TYPE_PERMANENT_CE) { 866 i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 867 dimmnm = mc_ff_dimm_unum_table[i]; 868 memb_num = dimmnm[0]; 869 snprintf(buf, buflen, "/%s/%s%c/MEM%s", 870 model_names[plat_model].unit_name, 871 model_names[plat_model].mem_name, 872 memb_num, &dimmnm[1]); 873 } else { 874 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 875 j = (cs == 0) ? i : i + 2; 876 memb_num = mc_ff_dimm_unum_table[i][0], 877 snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s", 878 model_names[plat_model].unit_name, 879 model_names[plat_model].mem_name, memb_num, 880 &mc_ff_dimm_unum_table[j][1], 881 &mc_ff_dimm_unum_table[j + 1][1]); 882 } 883 break; 884 case MODEL_IKKAKU: 885 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 886 mf_type == FLT_TYPE_PERMANENT_CE) { 887 i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 888 dimmnm = mc_ff_dimm_unum_table[i]; 889 snprintf(buf, buflen, "/%s/MEM%s", 890 model_names[plat_model].unit_name, &dimmnm[1]); 891 } else { 892 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 893 j = (cs == 0) ? i : i + 2; 894 memb_num = mc_ff_dimm_unum_table[i][0], 895 snprintf(buf, buflen, "/%s/MEM%s MEM%s", 896 model_names[plat_model].unit_name, 897 &mc_ff_dimm_unum_table[j][1], 898 &mc_ff_dimm_unum_table[j + 1][1]); 899 } 900 break; 901 default: 902 return (-1); 903 } 904 return (0); 905 } 906 907 static void 908 mc_ereport_post(mc_aflt_t *mc_aflt) 909 { 910 char buf[FM_MAX_CLASS]; 911 char device_path[MAXPATHLEN]; 912 char sid[MAXPATHLEN]; 913 nv_alloc_t *nva = NULL; 914 nvlist_t *ereport, *detector, *resource; 915 errorq_elem_t *eqep; 916 int nflts; 917 mc_flt_stat_t *flt_stat; 918 int i, n; 919 int blen = MAXPATHLEN; 920 char *p, *s = NULL; 921 uint32_t values[2], synd[2], dslot[2]; 922 uint64_t offset = (uint64_t)-1; 923 int ret = -1; 924 925 if (panicstr) { 926 eqep = errorq_reserve(ereport_errorq); 927 if (eqep == NULL) 928 return; 929 ereport = errorq_elem_nvl(ereport_errorq, eqep); 930 nva = errorq_elem_nva(ereport_errorq, eqep); 931 } else { 932 ereport = fm_nvlist_create(nva); 933 } 934 935 /* 936 * Create the scheme "dev" FMRI. 937 */ 938 detector = fm_nvlist_create(nva); 939 resource = fm_nvlist_create(nva); 940 941 nflts = mc_aflt->mflt_nflts; 942 943 ASSERT(nflts >= 1 && nflts <= 2); 944 945 flt_stat = mc_aflt->mflt_stat[0]; 946 (void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path); 947 (void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL, 948 device_path, NULL); 949 950 /* 951 * Encode all the common data into the ereport. 952 */ 953 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", MC_OPL_ERROR_CLASS, 954 mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : MC_OPL_MI_SUBCLASS, 955 mc_aflt->mflt_erpt_class); 956 957 MC_LOG("mc_ereport_post: ereport %s\n", buf); 958 959 960 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 961 fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), detector, NULL); 962 963 /* 964 * Set payload. 965 */ 966 fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32, 967 flt_stat->mf_flt_maddr.ma_bd, NULL); 968 969 fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64, 970 flt_stat->mf_flt_paddr, NULL); 971 972 if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE || 973 flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 974 fm_payload_set(ereport, MC_OPL_FLT_TYPE, DATA_TYPE_UINT8, 975 ECC_STICKY, NULL); 976 } 977 978 for (i = 0; i < nflts; i++) 979 values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank; 980 981 fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, nflts, 982 values, NULL); 983 984 for (i = 0; i < nflts; i++) 985 values[i] = mc_aflt->mflt_stat[i]->mf_cntl; 986 987 fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, nflts, 988 values, NULL); 989 990 for (i = 0; i < nflts; i++) 991 values[i] = mc_aflt->mflt_stat[i]->mf_err_add; 992 993 /* offset is set only for PCE and ICE */ 994 if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_INTERMITTENT_CE || 995 mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) { 996 offset = values[0]; 997 998 } 999 fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, nflts, 1000 values, NULL); 1001 1002 for (i = 0; i < nflts; i++) 1003 values[i] = mc_aflt->mflt_stat[i]->mf_err_log; 1004 1005 fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, nflts, 1006 values, NULL); 1007 1008 for (i = 0; i < nflts; i++) { 1009 flt_stat = mc_aflt->mflt_stat[i]; 1010 if (flt_stat->mf_errlog_valid) { 1011 synd[i] = flt_stat->mf_synd; 1012 dslot[i] = flt_stat->mf_dimm_slot; 1013 values[i] = flt_stat->mf_dram_place; 1014 } else { 1015 synd[i] = 0; 1016 dslot[i] = 0; 1017 values[i] = 0; 1018 } 1019 } 1020 1021 fm_payload_set(ereport, MC_OPL_ERR_SYND, DATA_TYPE_UINT32_ARRAY, nflts, 1022 synd, NULL); 1023 1024 fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, DATA_TYPE_UINT32_ARRAY, 1025 nflts, dslot, NULL); 1026 1027 fm_payload_set(ereport, MC_OPL_ERR_DRAM, DATA_TYPE_UINT32_ARRAY, nflts, 1028 values, NULL); 1029 1030 device_path[0] = 0; 1031 p = &device_path[0]; 1032 sid[0] = 0; 1033 s = &sid[0]; 1034 ret = 0; 1035 1036 for (i = 0; i < nflts; i++) { 1037 int bank; 1038 1039 flt_stat = mc_aflt->mflt_stat[i]; 1040 bank = flt_stat->mf_flt_maddr.ma_bank; 1041 ret = mc_set_mem_unum(p + strlen(p), blen, 1042 flt_stat->mf_flt_maddr.ma_phys_bd, bank, flt_stat->mf_type, 1043 flt_stat->mf_dimm_slot); 1044 1045 if (ret != 0) { 1046 cmn_err(CE_WARN, 1047 "mc_ereport_post: Failed to determine the unum " 1048 "for board=%d bank=%d type=0x%x slot=0x%x", 1049 flt_stat->mf_flt_maddr.ma_bd, bank, 1050 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1051 continue; 1052 } 1053 n = strlen(device_path); 1054 blen = MAXPATHLEN - n; 1055 p = &device_path[n]; 1056 if (i < (nflts - 1)) { 1057 snprintf(p, blen, " "); 1058 blen--; 1059 p++; 1060 } 1061 1062 if (ret == 0) { 1063 ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s), 1064 blen, flt_stat->mf_flt_maddr.ma_phys_bd, bank, 1065 flt_stat->mf_type, flt_stat->mf_dimm_slot); 1066 1067 } 1068 } 1069 1070 (void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL, 1071 device_path, (ret == 0) ? sid : NULL, (ret == 0) ? offset : 1072 (uint64_t)-1); 1073 1074 fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, resource, 1075 NULL); 1076 1077 if (panicstr) { 1078 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 1079 } else { 1080 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 1081 fm_nvlist_destroy(ereport, FM_NVA_FREE); 1082 fm_nvlist_destroy(detector, FM_NVA_FREE); 1083 fm_nvlist_destroy(resource, FM_NVA_FREE); 1084 } 1085 } 1086 1087 1088 static void 1089 mc_err_drain(mc_aflt_t *mc_aflt) 1090 { 1091 int rv; 1092 uint64_t pa = (uint64_t)(-1); 1093 int i; 1094 1095 MC_LOG("mc_err_drain: %s\n", mc_aflt->mflt_erpt_class); 1096 /* 1097 * we come here only when we have: 1098 * In mirror mode: MUE, SUE 1099 * In normal mode: UE, Permanent CE, Intermittent CE 1100 */ 1101 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1102 rv = mcaddr_to_pa(mc_aflt->mflt_mcp, 1103 &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa); 1104 1105 /* Ensure the pa is valid (not in isolated memory block) */ 1106 if (rv == 0 && pa_is_valid(mc_aflt->mflt_mcp, pa)) 1107 mc_aflt->mflt_stat[i]->mf_flt_paddr = pa; 1108 else 1109 mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1; 1110 } 1111 1112 MC_LOG("mc_err_drain:pa = %lx\n", pa); 1113 1114 switch (page_retire_check(pa, NULL)) { 1115 case 0: 1116 case EAGAIN: 1117 MC_LOG("Page retired or pending\n"); 1118 return; 1119 case EIO: 1120 /* 1121 * Do page retirement except for the PCE and ICE cases. 1122 * This is taken care by the OPL DE 1123 */ 1124 if (mc_aflt->mflt_stat[0]->mf_type != 1125 FLT_TYPE_INTERMITTENT_CE && 1126 mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) { 1127 MC_LOG("offline page at pa %lx error %x\n", pa, 1128 mc_aflt->mflt_pr); 1129 (void) page_retire(pa, mc_aflt->mflt_pr); 1130 } 1131 break; 1132 case EINVAL: 1133 default: 1134 /* 1135 * Some memory do not have page structure so 1136 * we keep going in case of EINVAL. 1137 */ 1138 break; 1139 } 1140 1141 for (i = 0; i < mc_aflt->mflt_nflts; i++) { 1142 mc_aflt_t mc_aflt0; 1143 if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) { 1144 mc_aflt0 = *mc_aflt; 1145 mc_aflt0.mflt_nflts = 1; 1146 mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i]; 1147 mc_ereport_post(&mc_aflt0); 1148 } 1149 } 1150 } 1151 1152 /* 1153 * The restart address is actually defined in unit of PA[37:6] 1154 * the mac patrol will convert that to dimm offset. If the 1155 * address is not in the bank, it will continue to search for 1156 * the next PA that is within the bank. 1157 * 1158 * Also the mac patrol scans the dimms based on PA, not 1159 * dimm offset. 1160 */ 1161 static int 1162 restart_patrol(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr_info) 1163 { 1164 uint64_t pa; 1165 int rv; 1166 1167 if (MC_REWRITE_MODE(mcp, bank)) { 1168 return (0); 1169 } 1170 if (rsaddr_info == NULL || (rsaddr_info->mi_valid == 0)) { 1171 MAC_PTRL_START(mcp, bank); 1172 return (0); 1173 } 1174 1175 rv = mcaddr_to_pa(mcp, &rsaddr_info->mi_restartaddr, &pa); 1176 if (rv != 0) { 1177 MC_LOG("cannot convert mcaddr to pa. use auto restart\n"); 1178 MAC_PTRL_START(mcp, bank); 1179 return (0); 1180 } 1181 1182 if (!mc_rangecheck_pa(mcp, pa)) { 1183 /* pa is not on this board, just retry */ 1184 cmn_err(CE_WARN, "restart_patrol: invalid address %lx " 1185 "on board %d\n", pa, mcp->mc_board_num); 1186 MAC_PTRL_START(mcp, bank); 1187 return (0); 1188 } 1189 1190 MC_LOG("restart_patrol: pa = %lx\n", pa); 1191 1192 if (!rsaddr_info->mi_injectrestart) { 1193 /* 1194 * For non-error injection restart we need to 1195 * determine if the current restart pa/page is 1196 * a "good" page. A "good" page is a page that 1197 * has not been page retired. If the current 1198 * page that contains the pa is "good", we will 1199 * do a HW auto restart and let HW patrol continue 1200 * where it last stopped. Most desired scenario. 1201 * 1202 * If the current page is not "good", we will advance 1203 * to the next page to find the next "good" page and 1204 * restart the patrol from there. 1205 */ 1206 int wrapcount = 0; 1207 uint64_t origpa = pa; 1208 while (wrapcount < 2) { 1209 if (!pa_is_valid(mcp, pa)) { 1210 /* 1211 * Not in physinstall - advance to the 1212 * next memory isolation blocksize 1213 */ 1214 MC_LOG("Invalid PA\n"); 1215 pa = roundup(pa + 1, mc_isolation_bsize); 1216 } else { 1217 int rv; 1218 if ((rv = page_retire_check(pa, NULL)) != 0 && 1219 rv != EAGAIN) { 1220 /* 1221 * The page is "good" (not retired), 1222 * we will use automatic HW restart 1223 * algorithm if this is the original 1224 * current starting page. 1225 */ 1226 if (pa == origpa) { 1227 MC_LOG("Page has no error. " 1228 "Auto restart\n"); 1229 MAC_PTRL_START(mcp, bank); 1230 return (0); 1231 } else { 1232 /* 1233 * found a subsequent good page 1234 */ 1235 break; 1236 } 1237 } 1238 1239 /* 1240 * Skip to the next page 1241 */ 1242 pa = roundup(pa + 1, PAGESIZE); 1243 MC_LOG("Skipping bad page to %lx\n", pa); 1244 } 1245 1246 /* Check to see if we hit the end of the memory range */ 1247 if (pa >= (mcp->mc_start_address + mcp->mc_size)) { 1248 MC_LOG("Wrap around\n"); 1249 pa = mcp->mc_start_address; 1250 wrapcount++; 1251 } 1252 } 1253 1254 if (wrapcount > 1) { 1255 MC_LOG("Failed to find a good page. Just restart\n"); 1256 MAC_PTRL_START(mcp, bank); 1257 return (0); 1258 } 1259 } 1260 1261 /* 1262 * We reached here either: 1263 * 1. We are doing an error injection restart that specify 1264 * the exact pa/page to restart. OR 1265 * 2. We found a subsequent good page different from the 1266 * original restart pa/page. 1267 * Restart MAC patrol: PA[37:6] 1268 */ 1269 MC_LOG("restart at pa = %lx\n", pa); 1270 ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa)); 1271 MAC_PTRL_START_ADD(mcp, bank); 1272 1273 return (0); 1274 } 1275 1276 static void 1277 mc_retry_info_put(mc_retry_info_t **q, mc_retry_info_t *p) 1278 { 1279 ASSERT(p != NULL); 1280 p->ri_next = *q; 1281 *q = p; 1282 } 1283 1284 static mc_retry_info_t * 1285 mc_retry_info_get(mc_retry_info_t **q) 1286 { 1287 mc_retry_info_t *p; 1288 1289 if ((p = *q) != NULL) { 1290 *q = p->ri_next; 1291 return (p); 1292 } else { 1293 return (NULL); 1294 } 1295 } 1296 1297 /* 1298 * Rewriting is used for two purposes. 1299 * - to correct the error in memory. 1300 * - to determine whether the error is permanent or intermittent. 1301 * It's done by writing the address in MAC_BANKm_REWRITE_ADD 1302 * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that, 1303 * REW_END (and REW_CE/REW_UE if some error detected) is set when 1304 * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM. 1305 * 1306 * Note that rewrite operation doesn't change RAW_UE to Marked UE. 1307 * Therefore, we use it only CE case. 1308 */ 1309 1310 static uint32_t 1311 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr, int retrying) 1312 { 1313 uint32_t cntl; 1314 int count = 0; 1315 int max_count; 1316 int retry_state; 1317 1318 if (retrying) 1319 max_count = 1; 1320 else 1321 max_count = mc_max_rewrite_loop; 1322 1323 retry_state = RETRY_STATE_PENDING; 1324 1325 if (!retrying && MC_REWRITE_MODE(mcp, bank)) { 1326 goto timeout; 1327 } 1328 1329 retry_state = RETRY_STATE_ACTIVE; 1330 1331 /* first wait to make sure PTRL_STATUS is 0 */ 1332 while (count++ < max_count) { 1333 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1334 if (!(cntl & MAC_CNTL_PTRL_STATUS)) { 1335 count = 0; 1336 break; 1337 } 1338 drv_usecwait(mc_rewrite_delay); 1339 } 1340 if (count >= max_count) 1341 goto timeout; 1342 1343 count = 0; 1344 1345 ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr); 1346 MAC_REW_REQ(mcp, bank); 1347 1348 retry_state = RETRY_STATE_REWRITE; 1349 1350 do { 1351 if (count++ > max_count) { 1352 goto timeout; 1353 } else { 1354 drv_usecwait(mc_rewrite_delay); 1355 } 1356 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 1357 /* 1358 * If there are other MEMORY or PCI activities, this 1359 * will be BUSY, else it should be set immediately 1360 */ 1361 } while (!(cntl & MAC_CNTL_REW_END)); 1362 1363 MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS); 1364 return (cntl); 1365 timeout: 1366 mc_set_rewrite(mcp, bank, dimm_addr, retry_state); 1367 1368 return (0); 1369 } 1370 1371 void 1372 mc_clear_rewrite(mc_opl_t *mcp, int bank) 1373 { 1374 struct mc_bank *bankp; 1375 mc_retry_info_t *retry; 1376 uint32_t rew_addr; 1377 1378 bankp = &(mcp->mc_bank[bank]); 1379 retry = bankp->mcb_active; 1380 bankp->mcb_active = NULL; 1381 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 1382 1383 again: 1384 bankp->mcb_rewrite_count = 0; 1385 1386 while (retry = mc_retry_info_get(&bankp->mcb_retry_pending)) { 1387 rew_addr = retry->ri_addr; 1388 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 1389 if (do_rewrite(mcp, bank, rew_addr, 1) == 0) 1390 break; 1391 } 1392 1393 /* we break out if no more pending rewrite or we got timeout again */ 1394 1395 if (!bankp->mcb_active && !bankp->mcb_retry_pending) { 1396 if (!IS_MIRROR(mcp, bank)) { 1397 MC_CLEAR_REWRITE_MODE(mcp, bank); 1398 } else { 1399 int mbank = bank ^ 1; 1400 bankp = &(mcp->mc_bank[mbank]); 1401 if (!bankp->mcb_active && !bankp->mcb_retry_pending) { 1402 MC_CLEAR_REWRITE_MODE(mcp, bank); 1403 MC_CLEAR_REWRITE_MODE(mcp, mbank); 1404 } else { 1405 bank = mbank; 1406 goto again; 1407 } 1408 } 1409 } 1410 } 1411 1412 void 1413 mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state) 1414 { 1415 mc_retry_info_t *retry; 1416 struct mc_bank *bankp; 1417 1418 bankp = &mcp->mc_bank[bank]; 1419 1420 retry = mc_retry_info_get(&bankp->mcb_retry_freelist); 1421 1422 ASSERT(retry != NULL); 1423 1424 retry->ri_addr = addr; 1425 retry->ri_state = state; 1426 1427 MC_SET_REWRITE_MODE(mcp, bank); 1428 1429 if ((state > RETRY_STATE_PENDING)) { 1430 ASSERT(bankp->mcb_active == NULL); 1431 bankp->mcb_active = retry; 1432 } else { 1433 mc_retry_info_put(&bankp->mcb_retry_pending, retry); 1434 } 1435 1436 if (IS_MIRROR(mcp, bank)) { 1437 int mbank = bank ^1; 1438 MC_SET_REWRITE_MODE(mcp, mbank); 1439 } 1440 } 1441 1442 void 1443 mc_process_scf_log(mc_opl_t *mcp) 1444 { 1445 int count; 1446 int n = 0; 1447 scf_log_t *p; 1448 int bank; 1449 1450 for (bank = 0; bank < BANKNUM_PER_SB; bank++) { 1451 while ((p = mcp->mc_scf_log[bank]) != NULL && 1452 (n < mc_max_errlog_processed)) { 1453 ASSERT(bank == p->sl_bank); 1454 count = 0; 1455 while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank)) 1456 & MAC_STATIC_ERR_VLD)) { 1457 if (count++ >= (mc_max_scf_loop)) { 1458 break; 1459 } 1460 drv_usecwait(mc_scf_delay); 1461 } 1462 1463 if (count < mc_max_scf_loop) { 1464 ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank), 1465 p->sl_err_log); 1466 1467 ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank), 1468 p->sl_err_add|MAC_STATIC_ERR_VLD); 1469 mcp->mc_scf_retry[bank] = 0; 1470 } else { 1471 /* 1472 * if we try too many times, just drop the req 1473 */ 1474 if (mcp->mc_scf_retry[bank]++ <= 1475 mc_max_scf_retry) { 1476 return; 1477 } else { 1478 if ((++mc_pce_dropped & 0xff) == 0) { 1479 cmn_err(CE_WARN, "Cannot " 1480 "report Permanent CE to " 1481 "SCF\n"); 1482 } 1483 } 1484 } 1485 n++; 1486 mcp->mc_scf_log[bank] = p->sl_next; 1487 mcp->mc_scf_total[bank]--; 1488 ASSERT(mcp->mc_scf_total[bank] >= 0); 1489 kmem_free(p, sizeof (scf_log_t)); 1490 } 1491 } 1492 } 1493 void 1494 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank) 1495 { 1496 scf_log_t *p; 1497 1498 if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) { 1499 if ((++mc_pce_dropped & 0xff) == 0) { 1500 cmn_err(CE_WARN, "Too many Permanent CE requests.\n"); 1501 } 1502 return; 1503 } 1504 p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP); 1505 p->sl_next = 0; 1506 p->sl_err_add = flt_stat->mf_err_add; 1507 p->sl_err_log = flt_stat->mf_err_log; 1508 p->sl_bank = bank; 1509 1510 if (mcp->mc_scf_log[bank] == NULL) { 1511 /* 1512 * we rely on mc_scf_log to detect NULL queue. 1513 * mc_scf_log_tail is irrelevant is such case. 1514 */ 1515 mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p; 1516 } else { 1517 mcp->mc_scf_log_tail[bank]->sl_next = p; 1518 mcp->mc_scf_log_tail[bank] = p; 1519 } 1520 mcp->mc_scf_total[bank]++; 1521 } 1522 /* 1523 * This routine determines what kind of CE happens, intermittent 1524 * or permanent as follows. (See 4.7.3 in Columbus2 PRM.) 1525 * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register. 1526 * - If CE is still detected on the same address even after doing 1527 * rewrite operation twice, it is determined as permanent error. 1528 * - If error is not detected anymore, it is determined as intermittent 1529 * error. 1530 * - If UE is detected due to rewrite operation, it should be treated 1531 * as UE. 1532 */ 1533 1534 /* ARGSUSED */ 1535 static void 1536 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error) 1537 { 1538 uint32_t cntl; 1539 int i; 1540 1541 flt_stat->mf_type = FLT_TYPE_PERMANENT_CE; 1542 /* 1543 * rewrite request 1st time reads and correct error data 1544 * and write to DIMM. 2nd rewrite request must be issued 1545 * after REW_CE/UE/END is 0. When the 2nd request is completed, 1546 * if REW_CE = 1, then it is permanent CE. 1547 */ 1548 for (i = 0; i < 2; i++) { 1549 cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add, 0); 1550 1551 if (cntl == 0) { 1552 /* timeout case */ 1553 return; 1554 } 1555 /* 1556 * If the error becomes UE or CMPE 1557 * we return to the caller immediately. 1558 */ 1559 if (cntl & MAC_CNTL_REW_UE) { 1560 if (ptrl_error) 1561 flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE; 1562 else 1563 flt_stat->mf_cntl |= MAC_CNTL_MI_UE; 1564 flt_stat->mf_type = FLT_TYPE_UE; 1565 return; 1566 } 1567 if (cntl & MAC_CNTL_REW_CMPE) { 1568 if (ptrl_error) 1569 flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE; 1570 else 1571 flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE; 1572 flt_stat->mf_type = FLT_TYPE_CMPE; 1573 return; 1574 } 1575 } 1576 if (!(cntl & MAC_CNTL_REW_CE)) { 1577 flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE; 1578 } 1579 1580 if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1581 /* report PERMANENT_CE to SP via SCF */ 1582 if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) { 1583 mc_queue_scf_log(mcp, flt_stat, bank); 1584 } 1585 } 1586 } 1587 1588 #define IS_CMPE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\ 1589 MAC_CNTL_MI_CMPE)) 1590 #define IS_UE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE)) 1591 #define IS_CE(cntl, f) ((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE)) 1592 #define IS_OK(cntl, f) (!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \ 1593 MAC_CNTL_MI_ERRS))) 1594 1595 1596 static int 1597 IS_CE_ONLY(uint32_t cntl, int ptrl_error) 1598 { 1599 if (ptrl_error) { 1600 return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE); 1601 } else { 1602 return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE); 1603 } 1604 } 1605 1606 void 1607 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value) 1608 { 1609 int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank; 1610 1611 if (mcp->mc_speedup_period[ebank] > 0) 1612 value |= mc_max_speed; 1613 else 1614 value |= mcp->mc_speed; 1615 ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value); 1616 } 1617 1618 static void 1619 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1620 { 1621 flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1622 MAC_CNTL_PTRL_ERRS; 1623 flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank)); 1624 flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank)); 1625 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1626 flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num; 1627 flt_stat->mf_flt_maddr.ma_bank = bank; 1628 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1629 } 1630 1631 static void 1632 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat) 1633 { 1634 uint32_t status, old_status; 1635 1636 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & MAC_CNTL_MI_ERRS; 1637 old_status = 0; 1638 1639 /* we keep reading until the status is stable */ 1640 while (old_status != status) { 1641 old_status = status; 1642 flt_stat->mf_err_add = LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank)); 1643 flt_stat->mf_err_log = LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank)); 1644 status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & 1645 MAC_CNTL_MI_ERRS; 1646 if (status == old_status) { 1647 break; 1648 } 1649 } 1650 1651 flt_stat->mf_cntl = status; 1652 flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num; 1653 flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num; 1654 flt_stat->mf_flt_maddr.ma_bank = bank; 1655 flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add; 1656 } 1657 1658 1659 /* 1660 * Error philosophy for mirror mode: 1661 * 1662 * PTRL (The error address for both banks are same, since ptrl stops if it 1663 * detects error.) 1664 * - Compare error log CMPE. 1665 * 1666 * - UE-UE Report MUE. No rewrite. 1667 * 1668 * - UE-* UE-(CE/OK). Rewrite to scrub UE. Report SUE. 1669 * 1670 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1671 * If CE is permanent, inform SCF. Once for each 1672 * Dimm. If CE becomes UE or CMPE, go back to above. 1673 * 1674 * 1675 * MI (The error addresses for each bank are the same or different.) 1676 * - Compare error If addresses are the same. Just CMPE, so log CMPE. 1677 * If addresses are different (this could happen 1678 * as a result of scrubbing. Report each separately. 1679 * Only report error info on each side. 1680 * 1681 * - UE-UE Addresses are the same. Report MUE. 1682 * Addresses are different. Report SUE on each bank. 1683 * Rewrite to clear UE. 1684 * 1685 * - UE-* UE-(CE/OK) 1686 * Rewrite to clear UE. Report SUE for the bank. 1687 * 1688 * - CE-* CE-(CE/OK). Scrub to determine if CE is permanent. 1689 * If CE becomes UE or CMPE, go back to above. 1690 * 1691 */ 1692 1693 static int 1694 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat) 1695 { 1696 int ptrl_error = mc_aflt->mflt_is_ptrl; 1697 int i; 1698 int rv = 0; 1699 int bank; 1700 int rewrite_timeout = 0; 1701 1702 MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n", 1703 flt_stat[0].mf_cntl, flt_stat[1].mf_cntl); 1704 1705 if (ptrl_error) { 1706 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) & 1707 MAC_CNTL_PTRL_ERRS) == 0) 1708 return (0); 1709 } else { 1710 if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) & 1711 MAC_CNTL_MI_ERRS) == 0) 1712 return (0); 1713 } 1714 1715 /* 1716 * First we take care of the case of CE 1717 * because they can become UE or CMPE 1718 */ 1719 for (i = 0; i < 2; i++) { 1720 if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) { 1721 bank = flt_stat[i].mf_flt_maddr.ma_bank; 1722 MC_LOG("CE detected on bank %d\n", bank); 1723 mc_scrub_ce(mcp, bank, &flt_stat[i], ptrl_error); 1724 if (MC_REWRITE_ACTIVE(mcp, bank)) { 1725 rewrite_timeout = 1; 1726 } 1727 rv = 1; 1728 } 1729 } 1730 1731 if (rewrite_timeout) 1732 return (0); 1733 1734 /* The above scrubbing can turn CE into UE or CMPE */ 1735 1736 /* 1737 * Now we distinguish two cases: same address or not 1738 * the same address. It might seem more intuitive to 1739 * distinguish PTRL v.s. MI error but it is more 1740 * complicated that way. 1741 */ 1742 1743 if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) { 1744 1745 if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) || 1746 IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) { 1747 flt_stat[0].mf_type = FLT_TYPE_CMPE; 1748 flt_stat[1].mf_type = FLT_TYPE_CMPE; 1749 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1750 mc_aflt->mflt_nflts = 2; 1751 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1752 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1753 mc_aflt->mflt_pr = PR_UE; 1754 /* 1755 * Compare error is result of MAC internal error, so 1756 * simply log it instead of publishing an ereport. SCF 1757 * diagnoses all the MAC internal and its i/f error. 1758 */ 1759 MC_LOG("cmpe error detected\n"); 1760 return (1); 1761 } 1762 1763 if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) && 1764 IS_UE(flt_stat[1].mf_cntl, ptrl_error)) { 1765 /* Both side are UE's */ 1766 1767 MAC_SET_ERRLOG_INFO(&flt_stat[0]); 1768 MAC_SET_ERRLOG_INFO(&flt_stat[1]); 1769 MC_LOG("MUE detected\n"); 1770 flt_stat[0].mf_type = FLT_TYPE_MUE; 1771 flt_stat[1].mf_type = FLT_TYPE_MUE; 1772 mc_aflt->mflt_erpt_class = MC_OPL_MUE; 1773 mc_aflt->mflt_nflts = 2; 1774 mc_aflt->mflt_stat[0] = &flt_stat[0]; 1775 mc_aflt->mflt_stat[1] = &flt_stat[1]; 1776 mc_aflt->mflt_pr = PR_UE; 1777 mc_err_drain(mc_aflt); 1778 return (1); 1779 } 1780 1781 /* Now the only case is UE/CE, UE/OK, or don't care */ 1782 for (i = 0; i < 2; i++) { 1783 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1784 1785 /* rewrite can clear the one side UE error */ 1786 1787 if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) { 1788 (void) do_rewrite(mcp, 1789 flt_stat[i].mf_flt_maddr.ma_bank, 1790 flt_stat[i].mf_flt_maddr.ma_dimm_addr, 0); 1791 } 1792 flt_stat[i].mf_type = FLT_TYPE_UE; 1793 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1794 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1795 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1796 mc_aflt->mflt_nflts = 1; 1797 mc_aflt->mflt_pr = PR_MCE; 1798 mc_err_drain(mc_aflt); 1799 /* Once we hit a UE/CE or UE/OK case, done */ 1800 return (1); 1801 } 1802 } 1803 1804 } else { 1805 /* 1806 * addresses are different. That means errors 1807 * on the 2 banks are not related at all. 1808 */ 1809 for (i = 0; i < 2; i++) { 1810 if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) { 1811 flt_stat[i].mf_type = FLT_TYPE_CMPE; 1812 mc_aflt->mflt_erpt_class = MC_OPL_CMPE; 1813 mc_aflt->mflt_nflts = 1; 1814 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1815 mc_aflt->mflt_pr = PR_UE; 1816 /* 1817 * Compare error is result of MAC internal 1818 * error, so simply log it instead of 1819 * publishing an ereport. SCF diagnoses all 1820 * the MAC internal and its interface error. 1821 */ 1822 MC_LOG("cmpe error detected\n"); 1823 /* no more report on this bank */ 1824 flt_stat[i].mf_cntl = 0; 1825 rv = 1; 1826 } 1827 } 1828 1829 /* rewrite can clear the one side UE error */ 1830 1831 for (i = 0; i < 2; i++) { 1832 if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) { 1833 (void) do_rewrite(mcp, 1834 flt_stat[i].mf_flt_maddr.ma_bank, 1835 flt_stat[i].mf_flt_maddr.ma_dimm_addr, 1836 0); 1837 flt_stat[i].mf_type = FLT_TYPE_UE; 1838 MAC_SET_ERRLOG_INFO(&flt_stat[i]); 1839 mc_aflt->mflt_erpt_class = MC_OPL_SUE; 1840 mc_aflt->mflt_stat[0] = &flt_stat[i]; 1841 mc_aflt->mflt_nflts = 1; 1842 mc_aflt->mflt_pr = PR_MCE; 1843 mc_err_drain(mc_aflt); 1844 rv = 1; 1845 } 1846 } 1847 } 1848 return (rv); 1849 } 1850 static void 1851 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr) 1852 { 1853 mc_aflt_t mc_aflt; 1854 mc_flt_stat_t flt_stat[2], mi_flt_stat[2]; 1855 int i; 1856 int mi_valid; 1857 1858 ASSERT(rsaddr); 1859 1860 bzero(&mc_aflt, sizeof (mc_aflt_t)); 1861 bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t)); 1862 bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t)); 1863 1864 1865 mc_aflt.mflt_mcp = mcp; 1866 mc_aflt.mflt_id = gethrtime(); 1867 1868 /* Now read all the registers into flt_stat */ 1869 1870 for (i = 0; i < 2; i++) { 1871 MC_LOG("Reading registers of bank %d\n", bank); 1872 /* patrol registers */ 1873 mc_read_ptrl_reg(mcp, bank, &flt_stat[i]); 1874 1875 /* 1876 * In mirror mode, it is possible that only one bank 1877 * may report the error. We need to check for it to 1878 * ensure we pick the right addr value for patrol restart. 1879 * Note that if both banks reported errors, we pick the 1880 * 2nd one. Both banks should reported the same error address. 1881 */ 1882 if (flt_stat[i].mf_cntl & MAC_CNTL_PTRL_ERRS) 1883 rsaddr->mi_restartaddr = flt_stat[i].mf_flt_maddr; 1884 1885 MC_LOG("ptrl registers cntl %x add %x log %x\n", 1886 flt_stat[i].mf_cntl, flt_stat[i].mf_err_add, 1887 flt_stat[i].mf_err_log); 1888 1889 /* MI registers */ 1890 mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]); 1891 1892 MC_LOG("MI registers cntl %x add %x log %x\n", 1893 mi_flt_stat[i].mf_cntl, mi_flt_stat[i].mf_err_add, 1894 mi_flt_stat[i].mf_err_log); 1895 1896 bank = bank^1; 1897 } 1898 1899 /* clear errors once we read all the registers */ 1900 MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1901 1902 MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 1903 1904 /* Process MI errors first */ 1905 1906 /* if not error mode, cntl1 is 0 */ 1907 if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1908 (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1909 mi_flt_stat[0].mf_cntl = 0; 1910 1911 if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1912 (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1913 mi_flt_stat[1].mf_cntl = 0; 1914 1915 mc_aflt.mflt_is_ptrl = 0; 1916 mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]); 1917 1918 if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1919 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[0].mf_cntl & 1920 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 1921 (flt_stat[0].mf_err_add == mi_flt_stat[0].mf_err_add) && 1922 (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >> 1923 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[1].mf_cntl & 1924 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 1925 (flt_stat[1].mf_err_add == mi_flt_stat[1].mf_err_add)) { 1926 #ifdef DEBUG 1927 MC_LOG("discarding PTRL error because " 1928 "it is the same as MI\n"); 1929 #endif 1930 rsaddr->mi_valid = mi_valid; 1931 return; 1932 } 1933 /* if not error mode, cntl1 is 0 */ 1934 if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) || 1935 (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID)) 1936 flt_stat[0].mf_cntl = 0; 1937 1938 if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) || 1939 (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID)) 1940 flt_stat[1].mf_cntl = 0; 1941 1942 mc_aflt.mflt_is_ptrl = 1; 1943 rsaddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]); 1944 } 1945 static int 1946 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt, 1947 mc_flt_stat_t *flt_stat) 1948 { 1949 int ptrl_error = mc_aflt->mflt_is_ptrl; 1950 int rv = 0; 1951 1952 mc_aflt->mflt_erpt_class = NULL; 1953 if (IS_UE(flt_stat->mf_cntl, ptrl_error)) { 1954 MC_LOG("UE detected\n"); 1955 flt_stat->mf_type = FLT_TYPE_UE; 1956 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1957 mc_aflt->mflt_pr = PR_UE; 1958 MAC_SET_ERRLOG_INFO(flt_stat); 1959 rv = 1; 1960 } else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) { 1961 MC_LOG("CE detected\n"); 1962 MAC_SET_ERRLOG_INFO(flt_stat); 1963 1964 /* Error type can change after scrubbing */ 1965 mc_scrub_ce(mcp, bank, flt_stat, ptrl_error); 1966 if (MC_REWRITE_ACTIVE(mcp, bank)) { 1967 return (0); 1968 } 1969 1970 if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE) { 1971 mc_aflt->mflt_erpt_class = MC_OPL_ICE; 1972 mc_aflt->mflt_pr = PR_MCE; 1973 } else if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) { 1974 mc_aflt->mflt_erpt_class = MC_OPL_CE; 1975 mc_aflt->mflt_pr = PR_MCE; 1976 } else if (flt_stat->mf_type == FLT_TYPE_UE) { 1977 mc_aflt->mflt_erpt_class = MC_OPL_UE; 1978 mc_aflt->mflt_pr = PR_UE; 1979 } 1980 rv = 1; 1981 } 1982 MC_LOG("mc_process_error: fault type %x erpt %s\n", flt_stat->mf_type, 1983 mc_aflt->mflt_erpt_class); 1984 if (mc_aflt->mflt_erpt_class) { 1985 mc_aflt->mflt_stat[0] = flt_stat; 1986 mc_aflt->mflt_nflts = 1; 1987 mc_err_drain(mc_aflt); 1988 } 1989 return (rv); 1990 } 1991 1992 static void 1993 mc_error_handler(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr) 1994 { 1995 mc_aflt_t mc_aflt; 1996 mc_flt_stat_t flt_stat, mi_flt_stat; 1997 int mi_valid; 1998 1999 bzero(&mc_aflt, sizeof (mc_aflt_t)); 2000 bzero(&flt_stat, sizeof (mc_flt_stat_t)); 2001 bzero(&mi_flt_stat, sizeof (mc_flt_stat_t)); 2002 2003 mc_aflt.mflt_mcp = mcp; 2004 mc_aflt.mflt_id = gethrtime(); 2005 2006 /* patrol registers */ 2007 mc_read_ptrl_reg(mcp, bank, &flt_stat); 2008 2009 ASSERT(rsaddr); 2010 rsaddr->mi_restartaddr = flt_stat.mf_flt_maddr; 2011 2012 MC_LOG("ptrl registers cntl %x add %x log %x\n", flt_stat.mf_cntl, 2013 flt_stat.mf_err_add, flt_stat.mf_err_log); 2014 2015 /* MI registers */ 2016 mc_read_mi_reg(mcp, bank, &mi_flt_stat); 2017 2018 2019 MC_LOG("MI registers cntl %x add %x log %x\n", mi_flt_stat.mf_cntl, 2020 mi_flt_stat.mf_err_add, mi_flt_stat.mf_err_log); 2021 2022 /* clear errors once we read all the registers */ 2023 MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS)); 2024 2025 mc_aflt.mflt_is_ptrl = 0; 2026 if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) && 2027 ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 2028 ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 2029 mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat); 2030 } 2031 2032 if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >> 2033 MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat.mf_cntl & 2034 MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) && 2035 (flt_stat.mf_err_add == mi_flt_stat.mf_err_add)) { 2036 #ifdef DEBUG 2037 MC_LOG("discarding PTRL error because " 2038 "it is the same as MI\n"); 2039 #endif 2040 rsaddr->mi_valid = mi_valid; 2041 return; 2042 } 2043 2044 mc_aflt.mflt_is_ptrl = 1; 2045 if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) && 2046 ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) && 2047 ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) { 2048 rsaddr->mi_valid = mc_process_error(mcp, bank, &mc_aflt, 2049 &flt_stat); 2050 } 2051 } 2052 /* 2053 * memory patrol error handling algorithm: 2054 * timeout() is used to do periodic polling 2055 * This is the flow chart. 2056 * timeout -> 2057 * mc_check_errors() 2058 * if memory bank is installed, read the status register 2059 * if any error bit is set, 2060 * -> mc_error_handler() 2061 * -> read all error registers 2062 * -> mc_process_error() 2063 * determine error type 2064 * rewrite to clear error or scrub to determine CE type 2065 * inform SCF on permanent CE 2066 * -> mc_err_drain 2067 * page offline processing 2068 * -> mc_ereport_post() 2069 */ 2070 2071 static void 2072 mc_process_rewrite(mc_opl_t *mcp, int bank) 2073 { 2074 uint32_t rew_addr, cntl; 2075 mc_retry_info_t *retry; 2076 struct mc_bank *bankp; 2077 2078 bankp = &(mcp->mc_bank[bank]); 2079 retry = bankp->mcb_active; 2080 if (retry == NULL) 2081 return; 2082 2083 if (retry->ri_state <= RETRY_STATE_ACTIVE) { 2084 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 2085 if (cntl & MAC_CNTL_PTRL_STATUS) 2086 return; 2087 rew_addr = retry->ri_addr; 2088 ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), rew_addr); 2089 MAC_REW_REQ(mcp, bank); 2090 2091 retry->ri_state = RETRY_STATE_REWRITE; 2092 } 2093 2094 cntl = ldphysio(MAC_PTRL_CNTL(mcp, bank)); 2095 2096 if (cntl & MAC_CNTL_REW_END) { 2097 MAC_CLEAR_ERRS(mcp, bank, 2098 MAC_CNTL_REW_ERRS); 2099 mc_clear_rewrite(mcp, bank); 2100 } else { 2101 /* 2102 * If the rewrite does not complete in 2103 * 1 hour, we have to consider this a HW 2104 * failure. However, there is no recovery 2105 * mechanism. The only thing we can do 2106 * to to print a warning message to the 2107 * console. We continue to increment the 2108 * counter but we only print the message 2109 * once. It will take the counter a long 2110 * time to wrap around and the user might 2111 * see a second message. In practice, 2112 * we have never hit this condition but 2113 * we have to keep the code here just in case. 2114 */ 2115 if (++mcp->mc_bank[bank].mcb_rewrite_count 2116 == mc_max_rewrite_retry) { 2117 cmn_err(CE_WARN, "Memory patrol feature is" 2118 " partly suspended on /LSB%d/B%d" 2119 " due to heavy memory load," 2120 " and it will restart" 2121 " automatically.\n", mcp->mc_board_num, 2122 bank); 2123 } 2124 } 2125 } 2126 2127 static void 2128 mc_check_errors_func(mc_opl_t *mcp) 2129 { 2130 mc_rsaddr_info_t rsaddr_info; 2131 int i, error_count = 0; 2132 uint32_t stat, cntl; 2133 int running; 2134 int wrapped; 2135 int ebk; 2136 2137 /* 2138 * scan errors. 2139 */ 2140 if (mcp->mc_status & MC_MEMORYLESS) 2141 return; 2142 2143 for (i = 0; i < BANKNUM_PER_SB; i++) { 2144 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2145 if (MC_REWRITE_ACTIVE(mcp, i)) { 2146 mc_process_rewrite(mcp, i); 2147 } 2148 stat = ldphysio(MAC_PTRL_STAT(mcp, i)); 2149 cntl = ldphysio(MAC_PTRL_CNTL(mcp, i)); 2150 running = cntl & MAC_CNTL_PTRL_START; 2151 wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX; 2152 2153 /* Compute the effective bank idx */ 2154 ebk = (IS_MIRROR(mcp, i)) ? MIRROR_IDX(i) : i; 2155 2156 if (mc_debug_show_all || stat) { 2157 MC_LOG("/LSB%d/B%d stat %x cntl %x\n", 2158 mcp->mc_board_num, i, stat, cntl); 2159 } 2160 2161 /* 2162 * Update stats and reset flag if the HW patrol 2163 * wrapped around in its scan. 2164 */ 2165 if (wrapped) { 2166 MAC_CLEAR_MAX(mcp, i); 2167 mcp->mc_period[ebk]++; 2168 if (IS_MIRROR(mcp, i)) { 2169 MC_LOG("mirror mc period %ld on " 2170 "/LSB%d/B%d\n", mcp->mc_period[ebk], 2171 mcp->mc_board_num, i); 2172 } else { 2173 MC_LOG("mc period %ld on " 2174 "/LSB%d/B%d\n", mcp->mc_period[ebk], 2175 mcp->mc_board_num, i); 2176 } 2177 } 2178 2179 if (running) { 2180 /* 2181 * Mac patrol HW is still running. 2182 * Normally when an error is detected, 2183 * the HW patrol will stop so that we 2184 * can collect error data for reporting. 2185 * Certain errors (MI errors) detected may not 2186 * cause the HW patrol to stop which is a 2187 * problem since we cannot read error data while 2188 * the HW patrol is running. SW is not allowed 2189 * to stop the HW patrol while it is running 2190 * as it may cause HW inconsistency. This is 2191 * described in a HW errata. 2192 * In situations where we detected errors 2193 * that may not cause the HW patrol to stop. 2194 * We speed up the HW patrol scanning in 2195 * the hope that it will find the 'real' PTRL 2196 * errors associated with the previous errors 2197 * causing the HW to finally stop so that we 2198 * can do the reporting. 2199 */ 2200 /* 2201 * Check to see if we did speed up 2202 * the HW patrol due to previous errors 2203 * detected that did not cause the patrol 2204 * to stop. We only do it if HW patrol scan 2205 * wrapped (counted as completing a 'period'). 2206 */ 2207 if (mcp->mc_speedup_period[ebk] > 0) { 2208 if (wrapped && 2209 (--mcp->mc_speedup_period[ebk] == 2210 0)) { 2211 /* 2212 * We did try to speed up. 2213 * The speed up period has 2214 * expired and the HW patrol 2215 * is still running. The 2216 * errors must be intermittent. 2217 * We have no choice but to 2218 * ignore them, reset the scan 2219 * speed to normal and clear 2220 * the MI error bits. For 2221 * mirror mode, we need to 2222 * clear errors on both banks. 2223 */ 2224 MC_LOG("Clearing MI errors\n"); 2225 MAC_CLEAR_ERRS(mcp, i, 2226 MAC_CNTL_MI_ERRS); 2227 2228 if (IS_MIRROR(mcp, i)) { 2229 MC_LOG("Clearing " 2230 "Mirror MI errs\n"); 2231 MAC_CLEAR_ERRS(mcp, 2232 i^1, 2233 MAC_CNTL_MI_ERRS); 2234 } 2235 } 2236 } else if (stat & MAC_STAT_MI_ERRS) { 2237 /* 2238 * MI errors detected but we cannot 2239 * report them since the HW patrol 2240 * is still running. 2241 * We will attempt to speed up the 2242 * scanning and hopefully the HW 2243 * can detect PRTL errors at the same 2244 * location that cause the HW patrol 2245 * to stop. 2246 */ 2247 mcp->mc_speedup_period[ebk] = 2; 2248 MAC_CMD(mcp, i, 0); 2249 } 2250 } else if (stat & (MAC_STAT_PTRL_ERRS | 2251 MAC_STAT_MI_ERRS)) { 2252 /* 2253 * HW Patrol has stopped and we found errors. 2254 * Proceed to collect and report error info. 2255 */ 2256 mcp->mc_speedup_period[ebk] = 0; 2257 rsaddr_info.mi_valid = 0; 2258 rsaddr_info.mi_injectrestart = 0; 2259 if (IS_MIRROR(mcp, i)) { 2260 mc_error_handler_mir(mcp, i, 2261 &rsaddr_info); 2262 } else { 2263 mc_error_handler(mcp, i, &rsaddr_info); 2264 } 2265 2266 error_count++; 2267 restart_patrol(mcp, i, &rsaddr_info); 2268 } else { 2269 /* 2270 * HW patrol scan has apparently stopped 2271 * but no errors detected/flagged. 2272 * Restart the HW patrol just to be sure. 2273 * In mirror mode, the odd bank might have 2274 * reported errors that caused the patrol to 2275 * stop. We'll defer the restart to the odd 2276 * bank in this case. 2277 */ 2278 if (!IS_MIRROR(mcp, i) || (i & 0x1)) 2279 restart_patrol(mcp, i, NULL); 2280 } 2281 } 2282 } 2283 if (error_count > 0) 2284 mcp->mc_last_error += error_count; 2285 else 2286 mcp->mc_last_error = 0; 2287 } 2288 2289 /* 2290 * mc_polling -- Check errors for only one instance, 2291 * but process errors for all instances to make sure we drain the errors 2292 * faster than they can be accumulated. 2293 * 2294 * Polling on each board should be done only once per each 2295 * mc_patrol_interval_sec. This is equivalent to setting mc_tick_left 2296 * to OPL_MAX_BOARDS and decrement by 1 on each timeout. 2297 * Once mc_tick_left becomes negative, the board becomes a candidate 2298 * for polling because it has waited for at least 2299 * mc_patrol_interval_sec's long. If mc_timeout_period is calculated 2300 * differently, this has to be updated accordingly. 2301 */ 2302 2303 static void 2304 mc_polling(void) 2305 { 2306 int i, scan_error; 2307 mc_opl_t *mcp; 2308 2309 2310 scan_error = 1; 2311 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2312 mutex_enter(&mcmutex); 2313 if ((mcp = mc_instances[i]) == NULL) { 2314 mutex_exit(&mcmutex); 2315 continue; 2316 } 2317 mutex_enter(&mcp->mc_lock); 2318 mutex_exit(&mcmutex); 2319 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2320 mutex_exit(&mcp->mc_lock); 2321 continue; 2322 } 2323 if (scan_error && mcp->mc_tick_left <= 0) { 2324 mc_check_errors_func((void *)mcp); 2325 mcp->mc_tick_left = OPL_MAX_BOARDS; 2326 scan_error = 0; 2327 } else { 2328 mcp->mc_tick_left--; 2329 } 2330 mc_process_scf_log(mcp); 2331 mutex_exit(&mcp->mc_lock); 2332 } 2333 } 2334 2335 static void 2336 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr) 2337 { 2338 maddr->ma_bd = mcp->mc_board_num; 2339 maddr->ma_bank = bank; 2340 maddr->ma_dimm_addr = 0; 2341 } 2342 2343 typedef struct mc_mem_range { 2344 uint64_t addr; 2345 uint64_t size; 2346 } mc_mem_range_t; 2347 2348 static int 2349 get_base_address(mc_opl_t *mcp) 2350 { 2351 mc_mem_range_t *mem_range; 2352 int len; 2353 2354 if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2355 "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) { 2356 return (DDI_FAILURE); 2357 } 2358 2359 mcp->mc_start_address = mem_range->addr; 2360 mcp->mc_size = mem_range->size; 2361 2362 kmem_free(mem_range, len); 2363 return (DDI_SUCCESS); 2364 } 2365 2366 struct mc_addr_spec { 2367 uint32_t bank; 2368 uint32_t phys_hi; 2369 uint32_t phys_lo; 2370 }; 2371 2372 #define REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo) 2373 2374 static char *mc_tbl_name[] = { 2375 "cs0-mc-pa-trans-table", 2376 "cs1-mc-pa-trans-table" 2377 }; 2378 2379 /* 2380 * This routine performs a rangecheck for a given PA 2381 * to see if it belongs to the memory range for this board. 2382 * Return 1 if it is valid (within the range) and 0 otherwise 2383 */ 2384 static int 2385 mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa) 2386 { 2387 if ((pa < mcp->mc_start_address) || (mcp->mc_start_address + 2388 mcp->mc_size <= pa)) 2389 return (0); 2390 else 2391 return (1); 2392 } 2393 2394 static void 2395 mc_memlist_delete(struct memlist *mlist) 2396 { 2397 struct memlist *ml; 2398 2399 for (ml = mlist; ml; ml = mlist) { 2400 mlist = ml->next; 2401 kmem_free(ml, sizeof (struct memlist)); 2402 } 2403 } 2404 2405 static struct memlist * 2406 mc_memlist_dup(struct memlist *mlist) 2407 { 2408 struct memlist *hl = NULL, *tl, **mlp; 2409 2410 if (mlist == NULL) 2411 return (NULL); 2412 2413 mlp = &hl; 2414 tl = *mlp; 2415 for (; mlist; mlist = mlist->next) { 2416 *mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP); 2417 (*mlp)->address = mlist->address; 2418 (*mlp)->size = mlist->size; 2419 (*mlp)->prev = tl; 2420 tl = *mlp; 2421 mlp = &((*mlp)->next); 2422 } 2423 *mlp = NULL; 2424 2425 return (hl); 2426 } 2427 2428 2429 static struct memlist * 2430 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len) 2431 { 2432 uint64_t end; 2433 struct memlist *ml, *tl, *nlp; 2434 2435 if (mlist == NULL) 2436 return (NULL); 2437 2438 end = base + len; 2439 if ((end <= mlist->address) || (base == end)) 2440 return (mlist); 2441 2442 for (tl = ml = mlist; ml; tl = ml, ml = nlp) { 2443 uint64_t mend; 2444 2445 nlp = ml->next; 2446 2447 if (end <= ml->address) 2448 break; 2449 2450 mend = ml->address + ml->size; 2451 if (base < mend) { 2452 if (base <= ml->address) { 2453 ml->address = end; 2454 if (end >= mend) 2455 ml->size = 0ull; 2456 else 2457 ml->size = mend - ml->address; 2458 } else { 2459 ml->size = base - ml->address; 2460 if (end < mend) { 2461 struct memlist *nl; 2462 /* 2463 * splitting an memlist entry. 2464 */ 2465 nl = kmem_alloc(sizeof (struct memlist), 2466 KM_SLEEP); 2467 nl->address = end; 2468 nl->size = mend - nl->address; 2469 if ((nl->next = nlp) != NULL) 2470 nlp->prev = nl; 2471 nl->prev = ml; 2472 ml->next = nl; 2473 nlp = nl; 2474 } 2475 } 2476 if (ml->size == 0ull) { 2477 if (ml == mlist) { 2478 if ((mlist = nlp) != NULL) 2479 nlp->prev = NULL; 2480 kmem_free(ml, sizeof (struct memlist)); 2481 if (mlist == NULL) 2482 break; 2483 ml = nlp; 2484 } else { 2485 if ((tl->next = nlp) != NULL) 2486 nlp->prev = tl; 2487 kmem_free(ml, sizeof (struct memlist)); 2488 ml = tl; 2489 } 2490 } 2491 } 2492 } 2493 2494 return (mlist); 2495 } 2496 2497 static void 2498 mc_get_mlist(mc_opl_t *mcp) 2499 { 2500 struct memlist *mlist; 2501 2502 memlist_read_lock(); 2503 mlist = mc_memlist_dup(phys_install); 2504 memlist_read_unlock(); 2505 2506 if (mlist) { 2507 mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address); 2508 } 2509 2510 if (mlist) { 2511 uint64_t startpa, endpa; 2512 2513 startpa = mcp->mc_start_address + mcp->mc_size; 2514 endpa = ptob(physmax + 1); 2515 if (endpa > startpa) { 2516 mlist = mc_memlist_del_span(mlist, startpa, 2517 endpa - startpa); 2518 } 2519 } 2520 2521 if (mlist) { 2522 mcp->mlist = mlist; 2523 } 2524 } 2525 2526 int 2527 mc_board_add(mc_opl_t *mcp) 2528 { 2529 struct mc_addr_spec *macaddr; 2530 cs_status_t *cs_status; 2531 int len, len1, i, bk, cc; 2532 mc_rsaddr_info_t rsaddr; 2533 uint32_t mirr; 2534 int nbanks = 0; 2535 uint64_t nbytes = 0; 2536 int mirror_mode = 0; 2537 int ret; 2538 2539 /* 2540 * Get configurations from "pseudo-mc" node which includes: 2541 * board# : LSB number 2542 * mac-addr : physical base address of MAC registers 2543 * csX-mac-pa-trans-table: translation table from DIMM address 2544 * to physical address or vice versa. 2545 */ 2546 mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip, 2547 DDI_PROP_DONTPASS, "board#", -1); 2548 2549 if (mcp->mc_board_num == -1) { 2550 return (DDI_FAILURE); 2551 } 2552 2553 /* 2554 * Get start address in this CAB. It can be gotten from 2555 * "sb-mem-ranges" property. 2556 */ 2557 2558 if (get_base_address(mcp) == DDI_FAILURE) { 2559 return (DDI_FAILURE); 2560 } 2561 /* get mac-pa trans tables */ 2562 for (i = 0; i < MC_TT_CS; i++) { 2563 len = MC_TT_ENTRIES; 2564 cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip, 2565 DDI_PROP_DONTPASS, mc_tbl_name[i], 2566 (caddr_t)mcp->mc_trans_table[i], &len); 2567 2568 if (cc != DDI_SUCCESS) { 2569 bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES); 2570 } 2571 } 2572 mcp->mlist = NULL; 2573 2574 mc_get_mlist(mcp); 2575 2576 /* initialize bank informations */ 2577 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2578 "mc-addr", (caddr_t)&macaddr, &len); 2579 if (cc != DDI_SUCCESS) { 2580 cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc); 2581 return (DDI_FAILURE); 2582 } 2583 2584 cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS, 2585 "cs-status", (caddr_t)&cs_status, &len1); 2586 2587 if (cc != DDI_SUCCESS) { 2588 if (len > 0) 2589 kmem_free(macaddr, len); 2590 cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc); 2591 return (DDI_FAILURE); 2592 } 2593 /* get the physical board number for a given logical board number */ 2594 mcp->mc_phys_board_num = mc_opl_get_physical_board(mcp->mc_board_num); 2595 2596 if (mcp->mc_phys_board_num < 0) { 2597 if (len > 0) 2598 kmem_free(macaddr, len); 2599 cmn_err(CE_WARN, "Unable to obtain the physical board number"); 2600 return (DDI_FAILURE); 2601 } 2602 2603 mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL); 2604 2605 for (i = 0; i < len1 / sizeof (cs_status_t); i++) { 2606 nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) | 2607 ((uint64_t)cs_status[i].cs_avail_low); 2608 } 2609 if (len1 > 0) 2610 kmem_free(cs_status, len1); 2611 nbanks = len / sizeof (struct mc_addr_spec); 2612 2613 if (nbanks > 0) 2614 nbytes /= nbanks; 2615 else { 2616 /* No need to free macaddr because len must be 0 */ 2617 mcp->mc_status |= MC_MEMORYLESS; 2618 return (DDI_SUCCESS); 2619 } 2620 2621 for (i = 0; i < BANKNUM_PER_SB; i++) { 2622 mcp->mc_scf_retry[i] = 0; 2623 mcp->mc_period[i] = 0; 2624 mcp->mc_speedup_period[i] = 0; 2625 } 2626 2627 /* 2628 * Get the memory size here. Let it be B (bytes). 2629 * Let T be the time in u.s. to scan 64 bytes. 2630 * If we want to complete 1 round of scanning in P seconds. 2631 * 2632 * B * T * 10^(-6) = P 2633 * --------------- 2634 * 64 2635 * 2636 * T = P * 64 * 10^6 2637 * ------------- 2638 * B 2639 * 2640 * = P * 64 * 10^6 2641 * ------------- 2642 * B 2643 * 2644 * The timing bits are set in PTRL_CNTL[28:26] where 2645 * 2646 * 0 - 1 m.s 2647 * 1 - 512 u.s. 2648 * 10 - 256 u.s. 2649 * 11 - 128 u.s. 2650 * 100 - 64 u.s. 2651 * 101 - 32 u.s. 2652 * 110 - 0 u.s. 2653 * 111 - reserved. 2654 * 2655 * 2656 * a[0] = 110, a[1] = 101, ... a[6] = 0 2657 * 2658 * cs-status property is int x 7 2659 * 0 - cs# 2660 * 1 - cs-status 2661 * 2 - cs-avail.hi 2662 * 3 - cs-avail.lo 2663 * 4 - dimm-capa.hi 2664 * 5 - dimm-capa.lo 2665 * 6 - #of dimms 2666 */ 2667 2668 if (nbytes > 0) { 2669 int i; 2670 uint64_t ms; 2671 ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes; 2672 mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds; 2673 for (i = 0; i < MC_MAX_SPEEDS - 1; i++) { 2674 if (ms < mc_scan_speeds[i + 1].mc_period) { 2675 mcp->mc_speed = mc_scan_speeds[i].mc_speeds; 2676 break; 2677 } 2678 } 2679 } else 2680 mcp->mc_speed = 0; 2681 2682 2683 for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) { 2684 struct mc_bank *bankp; 2685 mc_retry_info_t *retry; 2686 uint32_t reg; 2687 int k; 2688 2689 /* 2690 * setup bank 2691 */ 2692 bk = macaddr[i].bank; 2693 bankp = &(mcp->mc_bank[bk]); 2694 bankp->mcb_status = BANK_INSTALLED; 2695 bankp->mcb_reg_base = REGS_PA(macaddr, i); 2696 2697 bankp->mcb_retry_freelist = NULL; 2698 bankp->mcb_retry_pending = NULL; 2699 bankp->mcb_active = NULL; 2700 retry = &bankp->mcb_retry_infos[0]; 2701 for (k = 0; k < MC_RETRY_COUNT; k++, retry++) { 2702 mc_retry_info_put(&bankp->mcb_retry_freelist, retry); 2703 } 2704 2705 reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk)); 2706 bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS); 2707 2708 /* 2709 * check if mirror mode 2710 */ 2711 mirr = LD_MAC_REG(MAC_MIRR(mcp, bk)); 2712 2713 if (mirr & MAC_MIRR_MIRROR_MODE) { 2714 MC_LOG("Mirror -> /LSB%d/B%d\n", mcp->mc_board_num, 2715 bk); 2716 bankp->mcb_status |= BANK_MIRROR_MODE; 2717 mirror_mode = 1; 2718 /* 2719 * The following bit is only used for 2720 * error injection. We should clear it 2721 */ 2722 if (mirr & MAC_MIRR_BANK_EXCLUSIVE) 2723 ST_MAC_REG(MAC_MIRR(mcp, bk), 0); 2724 } 2725 2726 /* 2727 * restart if not mirror mode or the other bank 2728 * of the mirror is not running 2729 */ 2730 if (!(mirr & MAC_MIRR_MIRROR_MODE) || 2731 !(mcp->mc_bank[bk^1].mcb_status & BANK_PTRL_RUNNING)) { 2732 MC_LOG("Starting up /LSB%d/B%d\n", mcp->mc_board_num, 2733 bk); 2734 get_ptrl_start_address(mcp, bk, &rsaddr.mi_restartaddr); 2735 rsaddr.mi_valid = 0; 2736 rsaddr.mi_injectrestart = 0; 2737 restart_patrol(mcp, bk, &rsaddr); 2738 } else { 2739 MC_LOG("Not starting up /LSB%d/B%d\n", 2740 mcp->mc_board_num, bk); 2741 } 2742 bankp->mcb_status |= BANK_PTRL_RUNNING; 2743 } 2744 if (len > 0) 2745 kmem_free(macaddr, len); 2746 2747 ret = ndi_prop_update_int(DDI_DEV_T_NONE, mcp->mc_dip, "mirror-mode", 2748 mirror_mode); 2749 if (ret != DDI_PROP_SUCCESS) { 2750 cmn_err(CE_WARN, "Unable to update mirror-mode property"); 2751 } 2752 2753 mcp->mc_dimm_list = mc_get_dimm_list(mcp); 2754 2755 /* 2756 * set interval in HZ. 2757 */ 2758 mcp->mc_last_error = 0; 2759 2760 /* restart memory patrol checking */ 2761 mcp->mc_status |= MC_POLL_RUNNING; 2762 2763 return (DDI_SUCCESS); 2764 } 2765 2766 int 2767 mc_board_del(mc_opl_t *mcp) 2768 { 2769 int i; 2770 scf_log_t *p; 2771 2772 /* 2773 * cleanup mac state 2774 */ 2775 mutex_enter(&mcp->mc_lock); 2776 if (mcp->mc_status & MC_MEMORYLESS) { 2777 mutex_exit(&mcp->mc_lock); 2778 mutex_destroy(&mcp->mc_lock); 2779 return (DDI_SUCCESS); 2780 } 2781 for (i = 0; i < BANKNUM_PER_SB; i++) { 2782 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2783 mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED; 2784 } 2785 } 2786 2787 /* stop memory patrol checking */ 2788 mcp->mc_status &= ~MC_POLL_RUNNING; 2789 2790 /* just throw away all the scf logs */ 2791 for (i = 0; i < BANKNUM_PER_SB; i++) { 2792 while ((p = mcp->mc_scf_log[i]) != NULL) { 2793 mcp->mc_scf_log[i] = p->sl_next; 2794 mcp->mc_scf_total[i]--; 2795 kmem_free(p, sizeof (scf_log_t)); 2796 } 2797 } 2798 2799 if (mcp->mlist) 2800 mc_memlist_delete(mcp->mlist); 2801 2802 if (mcp->mc_dimm_list) 2803 mc_free_dimm_list(mcp->mc_dimm_list); 2804 2805 mutex_exit(&mcp->mc_lock); 2806 2807 mutex_destroy(&mcp->mc_lock); 2808 return (DDI_SUCCESS); 2809 } 2810 2811 int 2812 mc_suspend(mc_opl_t *mcp, uint32_t flag) 2813 { 2814 /* stop memory patrol checking */ 2815 mutex_enter(&mcp->mc_lock); 2816 if (mcp->mc_status & MC_MEMORYLESS) { 2817 mutex_exit(&mcp->mc_lock); 2818 return (DDI_SUCCESS); 2819 } 2820 2821 mcp->mc_status &= ~MC_POLL_RUNNING; 2822 2823 mcp->mc_status |= flag; 2824 mutex_exit(&mcp->mc_lock); 2825 2826 return (DDI_SUCCESS); 2827 } 2828 2829 void 2830 opl_mc_update_mlist(void) 2831 { 2832 int i; 2833 mc_opl_t *mcp; 2834 2835 /* 2836 * memory information is not updated until 2837 * the post attach/detach stage during DR. 2838 * This interface is used by dr_mem to inform 2839 * mc-opl to update the mlist. 2840 */ 2841 2842 mutex_enter(&mcmutex); 2843 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2844 if ((mcp = mc_instances[i]) == NULL) 2845 continue; 2846 mutex_enter(&mcp->mc_lock); 2847 if (mcp->mlist) 2848 mc_memlist_delete(mcp->mlist); 2849 mcp->mlist = NULL; 2850 mc_get_mlist(mcp); 2851 mutex_exit(&mcp->mc_lock); 2852 } 2853 mutex_exit(&mcmutex); 2854 } 2855 2856 /* caller must clear the SUSPEND bits or this will do nothing */ 2857 2858 int 2859 mc_resume(mc_opl_t *mcp, uint32_t flag) 2860 { 2861 int i; 2862 uint64_t basepa; 2863 2864 mutex_enter(&mcp->mc_lock); 2865 if (mcp->mc_status & MC_MEMORYLESS) { 2866 mutex_exit(&mcp->mc_lock); 2867 return (DDI_SUCCESS); 2868 } 2869 basepa = mcp->mc_start_address; 2870 if (get_base_address(mcp) == DDI_FAILURE) { 2871 mutex_exit(&mcp->mc_lock); 2872 return (DDI_FAILURE); 2873 } 2874 2875 if (basepa != mcp->mc_start_address) { 2876 if (mcp->mlist) 2877 mc_memlist_delete(mcp->mlist); 2878 mcp->mlist = NULL; 2879 mc_get_mlist(mcp); 2880 } 2881 2882 mcp->mc_status &= ~flag; 2883 2884 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 2885 mutex_exit(&mcp->mc_lock); 2886 return (DDI_SUCCESS); 2887 } 2888 2889 if (!(mcp->mc_status & MC_POLL_RUNNING)) { 2890 /* restart memory patrol checking */ 2891 mcp->mc_status |= MC_POLL_RUNNING; 2892 for (i = 0; i < BANKNUM_PER_SB; i++) { 2893 if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) { 2894 mc_check_errors_func(mcp); 2895 } 2896 } 2897 } 2898 mutex_exit(&mcp->mc_lock); 2899 2900 return (DDI_SUCCESS); 2901 } 2902 2903 static mc_opl_t * 2904 mc_pa_to_mcp(uint64_t pa) 2905 { 2906 mc_opl_t *mcp; 2907 int i; 2908 2909 ASSERT(MUTEX_HELD(&mcmutex)); 2910 for (i = 0; i < OPL_MAX_BOARDS; i++) { 2911 if ((mcp = mc_instances[i]) == NULL) 2912 continue; 2913 /* if mac patrol is suspended, we cannot rely on it */ 2914 if (!(mcp->mc_status & MC_POLL_RUNNING) || 2915 (mcp->mc_status & MC_SOFT_SUSPENDED)) 2916 continue; 2917 if (mc_rangecheck_pa(mcp, pa)) { 2918 return (mcp); 2919 } 2920 } 2921 return (NULL); 2922 } 2923 2924 /* 2925 * Get Physical Board number from Logical one. 2926 */ 2927 static int 2928 mc_opl_get_physical_board(int sb) 2929 { 2930 if (&opl_get_physical_board) { 2931 return (opl_get_physical_board(sb)); 2932 } 2933 2934 cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n"); 2935 return (-1); 2936 } 2937 2938 /* ARGSUSED */ 2939 int 2940 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen, 2941 int *lenp) 2942 { 2943 int i; 2944 int j; 2945 int sb; 2946 int bank; 2947 int cs; 2948 int rv = 0; 2949 mc_opl_t *mcp; 2950 char memb_num; 2951 2952 mutex_enter(&mcmutex); 2953 2954 if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) || 2955 (!pa_is_valid(mcp, flt_addr))) { 2956 mutex_exit(&mcmutex); 2957 if (snprintf(buf, buflen, "UNKNOWN") >= buflen) { 2958 return (ENOSPC); 2959 } else { 2960 if (lenp) 2961 *lenp = strlen(buf); 2962 } 2963 return (0); 2964 } 2965 2966 bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address); 2967 sb = mcp->mc_phys_board_num; 2968 cs = pa_to_cs(mcp, flt_addr - mcp->mc_start_address); 2969 2970 if (sb == -1) { 2971 mutex_exit(&mcmutex); 2972 return (ENXIO); 2973 } 2974 2975 switch (plat_model) { 2976 case MODEL_DC: 2977 i = BD_BK_SLOT_TO_INDEX(0, bank, 0); 2978 j = (cs == 0) ? i : i + 2; 2979 snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s", 2980 model_names[plat_model].unit_name, sb, 2981 mc_dc_dimm_unum_table[j], 2982 mc_dc_dimm_unum_table[j + 1]); 2983 break; 2984 case MODEL_FF2: 2985 case MODEL_FF1: 2986 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 2987 j = (cs == 0) ? i : i + 2; 2988 memb_num = mc_ff_dimm_unum_table[i][0]; 2989 snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s", 2990 model_names[plat_model].unit_name, 2991 model_names[plat_model].mem_name, memb_num, 2992 &mc_ff_dimm_unum_table[j][1], 2993 &mc_ff_dimm_unum_table[j + 1][1]); 2994 break; 2995 case MODEL_IKKAKU: 2996 i = BD_BK_SLOT_TO_INDEX(sb, bank, 0); 2997 j = (cs == 0) ? i : i + 2; 2998 snprintf(buf, buflen, "/%s/MEM%s MEM%s", 2999 model_names[plat_model].unit_name, 3000 &mc_ff_dimm_unum_table[j][1], 3001 &mc_ff_dimm_unum_table[j + 1][1]); 3002 break; 3003 default: 3004 rv = ENXIO; 3005 } 3006 if (lenp) { 3007 *lenp = strlen(buf); 3008 } 3009 mutex_exit(&mcmutex); 3010 return (rv); 3011 } 3012 3013 int 3014 opl_mc_suspend(void) 3015 { 3016 mc_opl_t *mcp; 3017 int i; 3018 3019 mutex_enter(&mcmutex); 3020 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3021 if ((mcp = mc_instances[i]) == NULL) 3022 continue; 3023 mc_suspend(mcp, MC_SOFT_SUSPENDED); 3024 } 3025 mutex_exit(&mcmutex); 3026 3027 return (0); 3028 } 3029 3030 int 3031 opl_mc_resume(void) 3032 { 3033 mc_opl_t *mcp; 3034 int i; 3035 3036 mutex_enter(&mcmutex); 3037 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3038 if ((mcp = mc_instances[i]) == NULL) 3039 continue; 3040 mc_resume(mcp, MC_SOFT_SUSPENDED); 3041 } 3042 mutex_exit(&mcmutex); 3043 3044 return (0); 3045 } 3046 static void 3047 insert_mcp(mc_opl_t *mcp) 3048 { 3049 mutex_enter(&mcmutex); 3050 if (mc_instances[mcp->mc_board_num] != NULL) { 3051 MC_LOG("mc-opl instance for board# %d already exists\n", 3052 mcp->mc_board_num); 3053 } 3054 mc_instances[mcp->mc_board_num] = mcp; 3055 mutex_exit(&mcmutex); 3056 } 3057 3058 static void 3059 delete_mcp(mc_opl_t *mcp) 3060 { 3061 mutex_enter(&mcmutex); 3062 mc_instances[mcp->mc_board_num] = 0; 3063 mutex_exit(&mcmutex); 3064 } 3065 3066 /* Error injection interface */ 3067 3068 static void 3069 mc_lock_va(uint64_t pa, caddr_t new_va) 3070 { 3071 tte_t tte; 3072 3073 vtag_flushpage(new_va, (uint64_t)ksfmmup); 3074 sfmmu_memtte(&tte, pa >> PAGESHIFT, PROC_DATA|HAT_NOSYNC, TTE8K); 3075 tte.tte_intlo |= TTE_LCK_INT; 3076 sfmmu_dtlb_ld_kva(new_va, &tte); 3077 } 3078 3079 static void 3080 mc_unlock_va(caddr_t va) 3081 { 3082 vtag_flushpage(va, (uint64_t)ksfmmup); 3083 } 3084 3085 /* ARGSUSED */ 3086 int 3087 mc_inject_error(int error_type, uint64_t pa, uint32_t flags) 3088 { 3089 mc_opl_t *mcp; 3090 int bank; 3091 uint32_t dimm_addr; 3092 uint32_t cntl; 3093 mc_rsaddr_info_t rsaddr; 3094 uint32_t data, stat; 3095 int both_sides = 0; 3096 uint64_t pa0; 3097 int extra_injection_needed = 0; 3098 extern void cpu_flush_ecache(void); 3099 3100 MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags); 3101 3102 mutex_enter(&mcmutex); 3103 if ((mcp = mc_pa_to_mcp(pa)) == NULL) { 3104 mutex_exit(&mcmutex); 3105 MC_LOG("mc_inject_error: invalid pa\n"); 3106 return (ENOTSUP); 3107 } 3108 3109 mutex_enter(&mcp->mc_lock); 3110 mutex_exit(&mcmutex); 3111 3112 if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) { 3113 mutex_exit(&mcp->mc_lock); 3114 MC_LOG("mc-opl has been suspended. No error injection.\n"); 3115 return (EBUSY); 3116 } 3117 3118 /* convert pa to offset within the board */ 3119 MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address); 3120 3121 if (!pa_is_valid(mcp, pa)) { 3122 mutex_exit(&mcp->mc_lock); 3123 return (EINVAL); 3124 } 3125 3126 pa0 = pa - mcp->mc_start_address; 3127 3128 bank = pa_to_bank(mcp, pa0); 3129 3130 if (flags & MC_INJECT_FLAG_OTHER) 3131 bank = bank ^ 1; 3132 3133 if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) { 3134 mutex_exit(&mcp->mc_lock); 3135 MC_LOG("Not mirror mode\n"); 3136 return (EINVAL); 3137 } 3138 3139 dimm_addr = pa_to_dimm(mcp, pa0); 3140 3141 MC_LOG("injecting error to /LSB%d/B%d/%x\n", mcp->mc_board_num, bank, 3142 dimm_addr); 3143 3144 3145 switch (error_type) { 3146 case MC_INJECT_INTERMITTENT_MCE: 3147 case MC_INJECT_PERMANENT_MCE: 3148 case MC_INJECT_MUE: 3149 both_sides = 1; 3150 } 3151 3152 if (flags & MC_INJECT_FLAG_RESET) 3153 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0); 3154 3155 ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK); 3156 3157 if (both_sides) { 3158 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0); 3159 ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), dimm_addr & 3160 MAC_EG_ADD_MASK); 3161 } 3162 3163 switch (error_type) { 3164 case MC_INJECT_SUE: 3165 extra_injection_needed = 1; 3166 /*FALLTHROUGH*/ 3167 case MC_INJECT_UE: 3168 case MC_INJECT_MUE: 3169 if (flags & MC_INJECT_FLAG_PATH) { 3170 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 | 3171 MAC_EG_FORCE_READ16 | MAC_EG_RDERR_ONCE; 3172 } else { 3173 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR00 | 3174 MAC_EG_FORCE_DERR16 | MAC_EG_DERR_ONCE; 3175 } 3176 flags |= MC_INJECT_FLAG_ST; 3177 break; 3178 case MC_INJECT_INTERMITTENT_CE: 3179 case MC_INJECT_INTERMITTENT_MCE: 3180 if (flags & MC_INJECT_FLAG_PATH) { 3181 cntl = MAC_EG_ADD_FIX |MAC_EG_FORCE_READ00 | 3182 MAC_EG_RDERR_ONCE; 3183 } else { 3184 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 | 3185 MAC_EG_DERR_ONCE; 3186 } 3187 extra_injection_needed = 1; 3188 flags |= MC_INJECT_FLAG_ST; 3189 break; 3190 case MC_INJECT_PERMANENT_CE: 3191 case MC_INJECT_PERMANENT_MCE: 3192 if (flags & MC_INJECT_FLAG_PATH) { 3193 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 | 3194 MAC_EG_RDERR_ALWAYS; 3195 } else { 3196 cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 | 3197 MAC_EG_DERR_ALWAYS; 3198 } 3199 flags |= MC_INJECT_FLAG_ST; 3200 break; 3201 case MC_INJECT_CMPE: 3202 data = 0xabcdefab; 3203 stphys(pa, data); 3204 cpu_flush_ecache(); 3205 MC_LOG("CMPE: writing data %x to %lx\n", data, pa); 3206 ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE); 3207 stphys(pa, data ^ 0xffffffff); 3208 membar_sync(); 3209 cpu_flush_ecache(); 3210 ST_MAC_REG(MAC_MIRR(mcp, bank), 0); 3211 MC_LOG("CMPE: write new data %xto %lx\n", data, pa); 3212 cntl = 0; 3213 break; 3214 case MC_INJECT_NOP: 3215 cntl = 0; 3216 break; 3217 default: 3218 MC_LOG("mc_inject_error: invalid option\n"); 3219 cntl = 0; 3220 } 3221 3222 if (cntl) { 3223 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK); 3224 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 3225 3226 if (both_sides) { 3227 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 3228 MAC_EG_SETUP_MASK); 3229 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 3230 } 3231 } 3232 3233 /* 3234 * For all injection cases except compare error, we 3235 * must write to the PA to trigger the error. 3236 */ 3237 3238 if (flags & MC_INJECT_FLAG_ST) { 3239 data = 0xf0e0d0c0; 3240 MC_LOG("Writing %x to %lx\n", data, pa); 3241 stphys(pa, data); 3242 cpu_flush_ecache(); 3243 } 3244 3245 3246 if (flags & MC_INJECT_FLAG_LD) { 3247 if (flags & MC_INJECT_FLAG_PREFETCH) { 3248 /* 3249 * Use strong prefetch operation to 3250 * inject MI errors. 3251 */ 3252 page_t *pp; 3253 extern void mc_prefetch(caddr_t); 3254 3255 MC_LOG("prefetch\n"); 3256 3257 pp = page_numtopp_nolock(pa >> PAGESHIFT); 3258 if (pp != NULL) { 3259 caddr_t va, va1; 3260 3261 va = ppmapin(pp, PROT_READ|PROT_WRITE, 3262 (caddr_t)-1); 3263 kpreempt_disable(); 3264 mc_lock_va((uint64_t)pa, va); 3265 va1 = va + (pa & (PAGESIZE - 1)); 3266 mc_prefetch(va1); 3267 mc_unlock_va(va); 3268 kpreempt_enable(); 3269 ppmapout(va); 3270 3271 /* 3272 * For MI errors, we need one extra 3273 * injection for HW patrol to stop. 3274 */ 3275 extra_injection_needed = 1; 3276 } else { 3277 cmn_err(CE_WARN, "Cannot find page structure" 3278 " for PA %lx\n", pa); 3279 } 3280 } else { 3281 MC_LOG("Reading from %lx\n", pa); 3282 data = ldphys(pa); 3283 MC_LOG("data = %x\n", data); 3284 } 3285 3286 if (extra_injection_needed) { 3287 /* 3288 * These are the injection cases where the 3289 * requested injected errors will not cause the HW 3290 * patrol to stop. For these cases, we need to inject 3291 * an extra 'real' PTRL error to force the 3292 * HW patrol to stop so that we can report the 3293 * errors injected. Note that we cannot read 3294 * and report error status while the HW patrol 3295 * is running. 3296 */ 3297 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 3298 cntl & MAC_EG_SETUP_MASK); 3299 ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl); 3300 3301 if (both_sides) { 3302 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl & 3303 MAC_EG_SETUP_MASK); 3304 ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl); 3305 } 3306 data = 0xf0e0d0c0; 3307 MC_LOG("Writing %x to %lx\n", data, pa); 3308 stphys(pa, data); 3309 cpu_flush_ecache(); 3310 } 3311 } 3312 3313 if (flags & MC_INJECT_FLAG_RESTART) { 3314 MC_LOG("Restart patrol\n"); 3315 rsaddr.mi_restartaddr.ma_bd = mcp->mc_board_num; 3316 rsaddr.mi_restartaddr.ma_bank = bank; 3317 rsaddr.mi_restartaddr.ma_dimm_addr = dimm_addr; 3318 rsaddr.mi_valid = 1; 3319 rsaddr.mi_injectrestart = 1; 3320 restart_patrol(mcp, bank, &rsaddr); 3321 } 3322 3323 if (flags & MC_INJECT_FLAG_POLL) { 3324 int running; 3325 int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank; 3326 3327 MC_LOG("Poll patrol error\n"); 3328 stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank)); 3329 cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)); 3330 running = cntl & MAC_CNTL_PTRL_START; 3331 3332 if (!running && 3333 (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) { 3334 /* 3335 * HW patrol stopped and we have errors to 3336 * report. Do it. 3337 */ 3338 mcp->mc_speedup_period[ebank] = 0; 3339 rsaddr.mi_valid = 0; 3340 rsaddr.mi_injectrestart = 0; 3341 if (IS_MIRROR(mcp, bank)) { 3342 mc_error_handler_mir(mcp, bank, &rsaddr); 3343 } else { 3344 mc_error_handler(mcp, bank, &rsaddr); 3345 } 3346 3347 restart_patrol(mcp, bank, &rsaddr); 3348 } else { 3349 /* 3350 * We are expecting to report injected 3351 * errors but the HW patrol is still running. 3352 * Speed up the scanning 3353 */ 3354 mcp->mc_speedup_period[ebank] = 2; 3355 MAC_CMD(mcp, bank, 0); 3356 restart_patrol(mcp, bank, NULL); 3357 } 3358 } 3359 3360 mutex_exit(&mcp->mc_lock); 3361 return (0); 3362 } 3363 3364 void 3365 mc_stphysio(uint64_t pa, uint32_t data) 3366 { 3367 MC_LOG("0x%x -> pa(%lx)\n", data, pa); 3368 stphysio(pa, data); 3369 3370 /* force the above write to be processed by mac patrol */ 3371 data = ldphysio(pa); 3372 MC_LOG("pa(%lx) = 0x%x\n", pa, data); 3373 } 3374 3375 uint32_t 3376 mc_ldphysio(uint64_t pa) 3377 { 3378 uint32_t rv; 3379 3380 rv = ldphysio(pa); 3381 MC_LOG("pa(%lx) = 0x%x\n", pa, rv); 3382 return (rv); 3383 } 3384 3385 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9') 3386 3387 /* 3388 * parse_unum_memory -- extract the board number and the DIMM name from 3389 * the unum. 3390 * 3391 * Return 0 for success and non-zero for a failure. 3392 */ 3393 int 3394 parse_unum_memory(char *unum, int *board, char *dname) 3395 { 3396 char *c; 3397 char x, y, z; 3398 3399 if ((c = strstr(unum, "CMU")) != NULL) { 3400 /* DC Model */ 3401 c += 3; 3402 *board = (uint8_t)stoi(&c); 3403 if ((c = strstr(c, "MEM")) == NULL) { 3404 return (1); 3405 } 3406 c += 3; 3407 if (strlen(c) < 3) { 3408 return (2); 3409 } 3410 if ((!isdigit(c[0])) || (!(isdigit(c[1]))) || 3411 ((c[2] != 'A') && (c[2] != 'B'))) { 3412 return (3); 3413 } 3414 x = c[0]; 3415 y = c[1]; 3416 z = c[2]; 3417 } else if ((c = strstr(unum, "MBU_")) != NULL) { 3418 /* FF1/FF2/Ikkaku Model */ 3419 c += 4; 3420 if ((c[0] != 'A') && (c[0] != 'B')) { 3421 return (4); 3422 } 3423 if (plat_model == MODEL_IKKAKU) { 3424 /* Ikkaku Model */ 3425 x = '0'; 3426 *board = 0; 3427 } else { 3428 /* FF1/FF2 Model */ 3429 if ((c = strstr(c, "MEMB")) == NULL) { 3430 return (5); 3431 } 3432 c += 4; 3433 3434 x = c[0]; 3435 *board = ((uint8_t)stoi(&c)) / 4; 3436 } 3437 3438 if ((c = strstr(c, "MEM")) == NULL) { 3439 return (6); 3440 } 3441 c += 3; 3442 if (strlen(c) < 2) { 3443 return (7); 3444 } 3445 if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) { 3446 return (8); 3447 } 3448 y = c[0]; 3449 z = c[1]; 3450 } else { 3451 return (9); 3452 } 3453 if (*board < 0) { 3454 return (10); 3455 } 3456 dname[0] = x; 3457 dname[1] = y; 3458 dname[2] = z; 3459 dname[3] = '\0'; 3460 return (0); 3461 } 3462 3463 /* 3464 * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and 3465 * the DIMM name. 3466 */ 3467 int 3468 mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf, 3469 int buflen, int *lenp) 3470 { 3471 int ret = ENODEV; 3472 mc_dimm_info_t *d = NULL; 3473 3474 if ((d = mcp->mc_dimm_list) == NULL) 3475 return (ENOTSUP); 3476 3477 for (; d != NULL; d = d->md_next) { 3478 if (strcmp(d->md_dimmname, dname) == 0) { 3479 break; 3480 } 3481 } 3482 if (d != NULL) { 3483 *lenp = strlen(d->md_serial) + strlen(d->md_partnum); 3484 if (buflen <= *lenp) { 3485 cmn_err(CE_WARN, "mc_get_mem_sid_dimm: " 3486 "buflen is smaller than %d\n", *lenp); 3487 ret = ENOSPC; 3488 } else { 3489 snprintf(buf, buflen, "%s:%s", 3490 d->md_serial, d->md_partnum); 3491 ret = 0; 3492 } 3493 } 3494 MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n", 3495 ret, dname, (ret == 0) ? buf : ""); 3496 return (ret); 3497 } 3498 3499 int 3500 mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int sb, 3501 int bank, uint32_t mf_type, uint32_t d_slot) 3502 { 3503 int lenp = buflen; 3504 int id; 3505 int ret; 3506 char *dimmnm; 3507 3508 if (mf_type == FLT_TYPE_INTERMITTENT_CE || 3509 mf_type == FLT_TYPE_PERMANENT_CE) { 3510 if (plat_model == MODEL_DC) { 3511 /* 3512 * All DC models 3513 */ 3514 id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot); 3515 dimmnm = mc_dc_dimm_unum_table[id]; 3516 } else { 3517 /* 3518 * All FF and Ikkaku models 3519 */ 3520 id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot); 3521 dimmnm = mc_ff_dimm_unum_table[id]; 3522 } 3523 if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen, 3524 &lenp)) != 0) { 3525 return (ret); 3526 } 3527 } else { 3528 return (1); 3529 } 3530 3531 return (0); 3532 } 3533 3534 /* 3535 * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum. 3536 */ 3537 int 3538 mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 3539 { 3540 int i; 3541 int ret = ENODEV; 3542 int board; 3543 char dname[MCOPL_MAX_DIMMNAME + 1]; 3544 mc_opl_t *mcp; 3545 3546 MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen); 3547 if ((ret = parse_unum_memory(unum, &board, dname)) != 0) { 3548 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3549 unum, ret); 3550 return (EINVAL); 3551 } 3552 3553 if (board < 0) { 3554 MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n", 3555 board, dname); 3556 return (EINVAL); 3557 } 3558 3559 mutex_enter(&mcmutex); 3560 /* 3561 * return ENOENT if we can not find the matching board. 3562 */ 3563 ret = ENOENT; 3564 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3565 if ((mcp = mc_instances[i]) == NULL) 3566 continue; 3567 mutex_enter(&mcp->mc_lock); 3568 if (mcp->mc_phys_board_num != board) { 3569 mutex_exit(&mcp->mc_lock); 3570 continue; 3571 } 3572 ret = mc_get_mem_sid_dimm(mcp, dname, buf, buflen, lenp); 3573 if (ret == 0) { 3574 mutex_exit(&mcp->mc_lock); 3575 break; 3576 } 3577 mutex_exit(&mcp->mc_lock); 3578 } 3579 mutex_exit(&mcmutex); 3580 return (ret); 3581 } 3582 3583 /* 3584 * mc_get_mem_offset -- get the offset in a DIMM for a given physical address. 3585 */ 3586 int 3587 mc_get_mem_offset(uint64_t paddr, uint64_t *offp) 3588 { 3589 int i; 3590 int ret = ENODEV; 3591 mc_addr_t maddr; 3592 mc_opl_t *mcp; 3593 3594 mutex_enter(&mcmutex); 3595 for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) { 3596 if ((mcp = mc_instances[i]) == NULL) 3597 continue; 3598 mutex_enter(&mcp->mc_lock); 3599 if (!pa_is_valid(mcp, paddr)) { 3600 mutex_exit(&mcp->mc_lock); 3601 continue; 3602 } 3603 if (pa_to_maddr(mcp, paddr, &maddr) == 0) { 3604 *offp = maddr.ma_dimm_addr; 3605 ret = 0; 3606 } 3607 mutex_exit(&mcp->mc_lock); 3608 } 3609 mutex_exit(&mcmutex); 3610 MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n", 3611 ret, paddr, *offp); 3612 return (ret); 3613 } 3614 3615 /* 3616 * dname_to_bankslot - Get the bank and slot number from the DIMM name. 3617 */ 3618 int 3619 dname_to_bankslot(char *dname, int *bank, int *slot) 3620 { 3621 int i; 3622 int tsz; 3623 char **tbl; 3624 3625 if (plat_model == MODEL_DC) { 3626 /* 3627 * All DC models 3628 */ 3629 tbl = mc_dc_dimm_unum_table; 3630 tsz = OPL_MAX_DIMMS; 3631 } else { 3632 /* 3633 * All FF and Ikkaku models 3634 */ 3635 tbl = mc_ff_dimm_unum_table; 3636 tsz = 2 * OPL_MAX_DIMMS; 3637 } 3638 3639 for (i = 0; i < tsz; i++) { 3640 if (strcmp(dname, tbl[i]) == 0) { 3641 break; 3642 } 3643 } 3644 if (i == tsz) { 3645 return (1); 3646 } 3647 *bank = INDEX_TO_BANK(i); 3648 *slot = INDEX_TO_SLOT(i); 3649 return (0); 3650 } 3651 3652 /* 3653 * mc_get_mem_addr -- get the physical address of a DIMM corresponding 3654 * to the unum and sid. 3655 */ 3656 int 3657 mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr) 3658 { 3659 int board; 3660 int bank; 3661 int slot; 3662 int i; 3663 int ret = ENODEV; 3664 char dname[MCOPL_MAX_DIMMNAME + 1]; 3665 mc_addr_t maddr; 3666 mc_opl_t *mcp; 3667 3668 MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n", 3669 unum, sid, offset); 3670 if (parse_unum_memory(unum, &board, dname) != 0) { 3671 MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n", 3672 unum, ret); 3673 return (EINVAL); 3674 } 3675 3676 if (board < 0) { 3677 MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n", 3678 board, dname); 3679 return (EINVAL); 3680 } 3681 3682 mutex_enter(&mcmutex); 3683 for (i = 0; i < OPL_MAX_BOARDS; i++) { 3684 if ((mcp = mc_instances[i]) == NULL) 3685 continue; 3686 mutex_enter(&mcp->mc_lock); 3687 if (mcp->mc_phys_board_num != board) { 3688 mutex_exit(&mcp->mc_lock); 3689 continue; 3690 } 3691 3692 ret = dname_to_bankslot(dname, &bank, &slot); 3693 MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot); 3694 if (ret != 0) { 3695 MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n"); 3696 ret = ENODEV; 3697 } else { 3698 maddr.ma_bd = mcp->mc_board_num; 3699 maddr.ma_bank = bank; 3700 maddr.ma_dimm_addr = offset; 3701 ret = mcaddr_to_pa(mcp, &maddr, paddr); 3702 if (ret != 0) { 3703 MC_LOG("mc_get_mem_addr: " 3704 "mcaddr_to_pa failed\n"); 3705 ret = ENODEV; 3706 } 3707 mutex_exit(&mcp->mc_lock); 3708 break; 3709 } 3710 mutex_exit(&mcp->mc_lock); 3711 } 3712 mutex_exit(&mcmutex); 3713 MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr); 3714 return (ret); 3715 } 3716 3717 static void 3718 mc_free_dimm_list(mc_dimm_info_t *d) 3719 { 3720 mc_dimm_info_t *next; 3721 3722 while (d != NULL) { 3723 next = d->md_next; 3724 kmem_free(d, sizeof (mc_dimm_info_t)); 3725 d = next; 3726 } 3727 } 3728 3729 /* 3730 * mc_get_dimm_list -- get the list of dimms with serial-id info 3731 * from the SP. 3732 */ 3733 mc_dimm_info_t * 3734 mc_get_dimm_list(mc_opl_t *mcp) 3735 { 3736 uint32_t bufsz; 3737 uint32_t maxbufsz; 3738 int ret; 3739 int sexp; 3740 board_dimm_info_t *bd_dimmp; 3741 mc_dimm_info_t *dimm_list = NULL; 3742 3743 maxbufsz = bufsz = sizeof (board_dimm_info_t) + 3744 ((MCOPL_MAX_DIMMNAME + MCOPL_MAX_SERIAL + 3745 MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS); 3746 3747 bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP); 3748 ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz); 3749 3750 MC_LOG("mc_get_dimm_list: scf_service_getinfo returned=%d\n", ret); 3751 if (ret == 0) { 3752 sexp = sizeof (board_dimm_info_t) + 3753 ((bd_dimmp->bd_dnamesz + bd_dimmp->bd_serialsz + 3754 bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms); 3755 3756 if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) && 3757 (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) && 3758 (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) && 3759 (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) && 3760 (sexp <= bufsz)) { 3761 3762 #ifdef DEBUG 3763 if (oplmc_debug) 3764 mc_dump_dimm_info(bd_dimmp); 3765 #endif 3766 dimm_list = mc_prepare_dimmlist(bd_dimmp); 3767 3768 } else { 3769 cmn_err(CE_WARN, "DIMM info version mismatch\n"); 3770 } 3771 } 3772 kmem_free(bd_dimmp, maxbufsz); 3773 MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", dimm_list); 3774 return (dimm_list); 3775 } 3776 3777 /* 3778 * mc_prepare_dimmlist - Prepare the dimm list from the information 3779 * received from the SP. 3780 */ 3781 mc_dimm_info_t * 3782 mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp) 3783 { 3784 char *dimm_name; 3785 char *serial; 3786 char *part; 3787 int dimm; 3788 int dnamesz = bd_dimmp->bd_dnamesz; 3789 int sersz = bd_dimmp->bd_serialsz; 3790 int partsz = bd_dimmp->bd_partnumsz; 3791 mc_dimm_info_t *dimm_list = NULL; 3792 mc_dimm_info_t *d; 3793 3794 dimm_name = (char *)(bd_dimmp + 1); 3795 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3796 3797 d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t), 3798 KM_SLEEP); 3799 3800 bcopy(dimm_name, d->md_dimmname, dnamesz); 3801 d->md_dimmname[dnamesz] = 0; 3802 3803 serial = dimm_name + dnamesz; 3804 bcopy(serial, d->md_serial, sersz); 3805 d->md_serial[sersz] = 0; 3806 3807 part = serial + sersz; 3808 bcopy(part, d->md_partnum, partsz); 3809 d->md_partnum[partsz] = 0; 3810 3811 d->md_next = dimm_list; 3812 dimm_list = d; 3813 dimm_name = part + partsz; 3814 } 3815 return (dimm_list); 3816 } 3817 3818 #ifdef DEBUG 3819 void 3820 mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz) 3821 { 3822 char dname[MCOPL_MAX_DIMMNAME + 1]; 3823 char serial[MCOPL_MAX_SERIAL + 1]; 3824 char part[ MCOPL_MAX_PARTNUM + 1]; 3825 char *b; 3826 3827 b = buf; 3828 bcopy(b, dname, dnamesz); 3829 dname[dnamesz] = 0; 3830 3831 b += dnamesz; 3832 bcopy(b, serial, serialsz); 3833 serial[serialsz] = 0; 3834 3835 b += serialsz; 3836 bcopy(b, part, partnumsz); 3837 part[partnumsz] = 0; 3838 3839 printf("DIMM=%s Serial=%s PartNum=%s\n", dname, serial, part); 3840 } 3841 3842 void 3843 mc_dump_dimm_info(board_dimm_info_t *bd_dimmp) 3844 { 3845 int dimm; 3846 int dnamesz = bd_dimmp->bd_dnamesz; 3847 int sersz = bd_dimmp->bd_serialsz; 3848 int partsz = bd_dimmp->bd_partnumsz; 3849 char *buf; 3850 3851 printf("Version=%d Board=%02d DIMMs=%d NameSize=%d " 3852 "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version, 3853 bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz, 3854 bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz); 3855 printf("======================================================\n"); 3856 3857 buf = (char *)(bd_dimmp + 1); 3858 for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) { 3859 mc_dump_dimm(buf, dnamesz, sersz, partsz); 3860 buf += dnamesz + sersz + partsz; 3861 } 3862 printf("======================================================\n"); 3863 } 3864 3865 3866 /* ARGSUSED */ 3867 static int 3868 mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 3869 int *rvalp) 3870 { 3871 caddr_t buf; 3872 uint64_t pa; 3873 int rv = 0; 3874 int i; 3875 uint32_t flags; 3876 static uint32_t offset = 0; 3877 3878 3879 flags = (cmd >> 4) & 0xfffffff; 3880 3881 cmd &= 0xf; 3882 3883 MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags); 3884 3885 if (arg != NULL) { 3886 if (ddi_copyin((const void *)arg, (void *)&pa, 3887 sizeof (uint64_t), 0) < 0) { 3888 rv = EFAULT; 3889 return (rv); 3890 } 3891 buf = NULL; 3892 } else { 3893 buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP); 3894 3895 pa = va_to_pa(buf); 3896 pa += offset; 3897 3898 offset += 64; 3899 if (offset >= PAGESIZE) 3900 offset = 0; 3901 } 3902 3903 switch (cmd) { 3904 case MCI_CE: 3905 mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, flags); 3906 break; 3907 case MCI_PERM_CE: 3908 mc_inject_error(MC_INJECT_PERMANENT_CE, pa, flags); 3909 break; 3910 case MCI_UE: 3911 mc_inject_error(MC_INJECT_UE, pa, flags); 3912 break; 3913 case MCI_M_CE: 3914 mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, flags); 3915 break; 3916 case MCI_M_PCE: 3917 mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, flags); 3918 break; 3919 case MCI_M_UE: 3920 mc_inject_error(MC_INJECT_MUE, pa, flags); 3921 break; 3922 case MCI_CMP: 3923 mc_inject_error(MC_INJECT_CMPE, pa, flags); 3924 break; 3925 case MCI_NOP: 3926 mc_inject_error(MC_INJECT_NOP, pa, flags); break; 3927 case MCI_SHOW_ALL: 3928 mc_debug_show_all = 1; 3929 break; 3930 case MCI_SHOW_NONE: 3931 mc_debug_show_all = 0; 3932 break; 3933 case MCI_ALLOC: 3934 /* 3935 * just allocate some kernel memory and never free it 3936 * 512 MB seems to be the maximum size supported. 3937 */ 3938 cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512); 3939 for (i = 0; i < flags; i++) { 3940 buf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP); 3941 cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n", 3942 (u_longlong_t)buf, (u_longlong_t)va_to_pa(buf)); 3943 } 3944 break; 3945 case MCI_SUSPEND: 3946 (void) opl_mc_suspend(); 3947 break; 3948 case MCI_RESUME: 3949 (void) opl_mc_resume(); 3950 break; 3951 default: 3952 rv = ENXIO; 3953 } 3954 return (rv); 3955 } 3956 3957 #endif /* DEBUG */ 3958