1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma dictionary "INTEL" 28 29/* 30 * Eversholt rules for the intel CPU/Memory 31 */ 32 33/* 34 * Ereports for Simple error codes. 35 */ 36 37#define SMPL_EVENT(leafclass, t) \ 38 event ereport.cpu.intel.leafclass@chip/core/strand { within(t) } 39 40SMPL_EVENT(unknown, 1s); 41SMPL_EVENT(unclassified, 1s); 42SMPL_EVENT(microcode_rom_parity, 1s); 43SMPL_EVENT(external, 1s); 44SMPL_EVENT(frc, 1s); 45SMPL_EVENT(internal_timer, 1s); 46SMPL_EVENT(internal_parity, 1s); 47SMPL_EVENT(internal_unclassified, 1s); 48 49/* 50 * Propogations for all but "external" and "unknown" simple errors. 51 * If the error is uncorrected we produce a fault immediately, otherwise 52 * we diagnose it to an upset and decalre a fault when the SERD engine 53 * trips. 54 */ 55 56engine serd.cpu.intel.simple@chip/core/strand, N=3, T=72h; 57event fault.cpu.intel.internal@chip/core/strand, 58 engine=serd.cpu.intel.simple@chip/core/strand; 59 60prop fault.cpu.intel.internal@chip/core/strand 61 { payloadprop("error_uncorrected") == 1 ? setserdincrement(4) : 1} (0)-> 62 ereport.cpu.intel.microcode_rom_parity@chip/core/strand, 63 ereport.cpu.intel.internal_timer@chip/core/strand, 64 ereport.cpu.intel.internal_parity@chip/core/strand, 65 ereport.cpu.intel.unclassified@chip/core/strand, 66 ereport.cpu.intel.internal_unclassified@chip/core/strand, 67 ereport.cpu.intel.frc@chip/core/strand; 68 69/* 70 * Ereports for Compound error codes. These are in pairs "foo" and "foo_uc" 71 * for the corrected and uncorrected version of each error type. All are 72 * detected at chip/core/strand. 73 */ 74 75#define CMPND_EVENT(leafclass, t) \ 76 event ereport.cpu.intel.leafclass@chip/core/strand { within(t) }; \ 77 event ereport.cpu.intel.leafclass/**/_uc@chip/core/strand { within(t) } 78 79/* 80 * Ereports for Compound error codes - intel errors 81 */ 82CMPND_EVENT(l0cache, 1s); 83CMPND_EVENT(l1cache, 1s); 84CMPND_EVENT(l2cache, 1s); 85CMPND_EVENT(cache, 1s); 86 87/* 88 * Ereports for Compound error codes - TLB errors 89 */ 90CMPND_EVENT(l0dtlb, 1s); 91CMPND_EVENT(l1dtlb, 1s); 92CMPND_EVENT(l2dtlb, 1s); 93CMPND_EVENT(dtlb, 1s); 94 95CMPND_EVENT(l0itlb, 1s); 96CMPND_EVENT(l1itlb, 1s); 97CMPND_EVENT(l2itlb, 1s); 98CMPND_EVENT(itlb, 1s); 99 100CMPND_EVENT(l0tlb, 1s); 101CMPND_EVENT(l1tlb, 1s); 102CMPND_EVENT(l2tlb, 1s); 103CMPND_EVENT(tlb, 1s); 104 105/* 106 * Ereports for Compound error codes - memory hierarchy errors 107 */ 108CMPND_EVENT(l0dcache, 1s); 109CMPND_EVENT(l1dcache, 1s); 110CMPND_EVENT(l2dcache, 1s); 111CMPND_EVENT(dcache, 1s); 112 113CMPND_EVENT(l0icache, 1s); 114CMPND_EVENT(l1icache, 1s); 115CMPND_EVENT(l2icache, 1s); 116CMPND_EVENT(icache, 1s); 117 118/* 119 * Ereports for Compound error codes - bus and interconnect errors 120 */ 121CMPND_EVENT(bus_interconnect, 1s); 122CMPND_EVENT(bus_interconnect_memory, 1s); 123CMPND_EVENT(bus_interconnect_io, 1s); 124 125/* 126 * Compound error propogations. 127 * 128 * We resist the temptation propogate, for example, a single dcache fault 129 * to all ereports mentioning dcache (l0dcache, l1dcache, l2dcache, dcache). 130 * Instead we will diagnose a distinct fault for each possible cache level, 131 * whether or not current chips have dcaches at all levels. 132 * 133 * Corrected errors are SERDed and produce a fault when the engine fires; 134 * the same fault is diagnosed immediately for a corresponding uncorrected 135 * error. 136 */ 137 138#define CMPND_FLT_PROP_1(erptleaf, fltleaf, n, t) \ 139 engine serd.cpu.intel.fltleaf@chip/core/strand, N=n, T=t; \ 140 event fault.cpu.intel.fltleaf@chip/core/strand, \ 141 engine=serd.cpu.intel.fltleaf@chip/core/strand; \ 142 \ 143 prop fault.cpu.intel.fltleaf@chip/core/strand (0)-> \ 144 ereport.cpu.intel.erptleaf@chip/core/strand; \ 145 \ 146 prop fault.cpu.intel.fltleaf@chip/core/strand \ 147 { setserdincrement(n + 1) } (0)-> \ 148 ereport.cpu.intel.erptleaf/**/_uc@chip/core/strand 149 150#define CMPND_FLT_PROP_2(erptleaf, fltleaf, n, t) \ 151 engine serd.cpu.intel.fltleaf@chip/core/strand, N=n, T=t; \ 152 event fault.cpu.intel.fltleaf@chip/core/strand, retire=0, response=0,\ 153 engine=serd.cpu.intel.fltleaf@chip/core/strand; \ 154 \ 155 prop fault.cpu.intel.fltleaf@chip/core/strand (0)-> \ 156 ereport.cpu.intel.erptleaf@chip/core/strand; \ 157 \ 158 prop fault.cpu.intel.fltleaf@chip/core/strand \ 159 { setserdincrement(n + 1) } (0)-> \ 160 ereport.cpu.intel.erptleaf/**/_uc@chip/core/strand 161 162CMPND_FLT_PROP_1(l0cache, l0cache, 3, 72h); 163CMPND_FLT_PROP_1(l1cache, l1cache, 3, 72h); 164CMPND_FLT_PROP_1(l2cache, l2cache, 3, 72h); 165CMPND_FLT_PROP_1(cache, cache, 12, 72h); 166 167CMPND_FLT_PROP_1(l0dtlb, l0dtlb, 3, 72h); 168CMPND_FLT_PROP_1(l1dtlb, l1dtlb, 3, 72h); 169CMPND_FLT_PROP_1(l2dtlb, l2dtlb, 3, 72h); 170CMPND_FLT_PROP_1(dtlb, dtlb, 12, 72h); 171 172CMPND_FLT_PROP_1(l0itlb, l0itlb, 3, 72h); 173CMPND_FLT_PROP_1(l1itlb, l1itlb, 3, 72h); 174CMPND_FLT_PROP_1(l2itlb, l2itlb, 3, 72h); 175CMPND_FLT_PROP_1(itlb, itlb, 12, 72h); 176 177CMPND_FLT_PROP_1(l0tlb, litlb, 3, 72h); 178CMPND_FLT_PROP_1(l1tlb, litlb, 3, 72h); 179CMPND_FLT_PROP_1(l2tlb, litlb, 3, 72h); 180CMPND_FLT_PROP_1(tlb, tlb, 12, 72h); 181 182CMPND_FLT_PROP_1(l0dcache, l0dcache, 3, 72h); 183CMPND_FLT_PROP_1(l1dcache, l1dcache, 3, 72h); 184CMPND_FLT_PROP_1(l2dcache, l2dcache, 3, 72h); 185CMPND_FLT_PROP_1(dcache, dcache, 12, 72h); 186 187CMPND_FLT_PROP_1(l0icache, l0icache, 3, 72h); 188CMPND_FLT_PROP_1(l1icache, l1icache, 3, 72h); 189CMPND_FLT_PROP_1(l2icache, l2icache, 3, 72h); 190CMPND_FLT_PROP_1(icache, icache, 12, 72h); 191 192CMPND_FLT_PROP_2(bus_interconnect, bus_interconnect, 10, 72h); 193CMPND_FLT_PROP_2(bus_interconnect_memory, bus_interconnect_memory, 10, 72h); 194CMPND_FLT_PROP_2(bus_interconnect_io, bus_interconnect_io, 10, 72h); 195 196event upset.discard@chip/core/strand; 197 198prop upset.discard@chip/core/strand (0)-> 199 ereport.cpu.intel.external@chip/core/strand, 200 ereport.cpu.intel.unknown@chip/core/strand; 201 202/* errors detected in northbridge */ 203 204 205/* 206 * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that 207 * we diagnose for page faults, to record the physical address of the faulting 208 * page. 209 */ 210#define SET_ADDR (!payloadprop_defined("physaddr") || \ 211 setpayloadprop("asru-physaddr", payloadprop("physaddr"))) 212 213#define SET_OFFSET (!payloadprop_defined("offset") || \ 214 setpayloadprop("asru-offset", payloadprop("offset"))) 215 216#define EREPORT_BUS_ERROR \ 217 ereport.cpu.intel.bus_interconnect_memory_uc@chip/core/strand, \ 218 ereport.cpu.intel.bus_interconnect_uc@chip/core/strand, \ 219 ereport.cpu.intel.bus_interconnect_memory@chip/core/strand, \ 220 ereport.cpu.intel.bus_interconnect@chip/core/strand, \ 221 ereport.cpu.intel.external@chip/core/strand 222 223engine stat.ce_pgflt@memory-controller/dram-channel/dimm; 224 225event ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller{within(12s)}; 226event ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller{within(12s)}; 227event fault.memory.intel.page_ue@ 228 motherboard/memory-controller/dram-channel/dimm/rank, 229 message=0, response=0; 230event fault.memory.intel.dimm_ue@ 231 motherboard/memory-controller/dram-channel/dimm/rank; 232 233prop fault.memory.intel.page_ue@ 234 motherboard/memory-controller/dram-channel/dimm/rank[rank_num] 235 { payloadprop_defined("rank") && rank_num == payloadprop("rank") && 236 (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && 237 SET_ADDR && SET_OFFSET } (1)-> 238 ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller, 239 ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller; 240 241prop fault.memory.intel.dimm_ue@ 242 motherboard/memory-controller/dram-channel<channel_num>/dimm/rank[rank_num] 243 { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)-> 244 ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller, 245 ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller; 246 247event upset.memory.intel.discard@motherboard/memory-controller{within(1s)}; 248 249prop upset.memory.intel.discard@motherboard/memory-controller (0)-> 250 ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller, 251 ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller; 252 253prop upset.memory.intel.discard@motherboard/memory-controller (0)-> 254 EREPORT_BUS_ERROR; 255 256#define PAGE_CE_COUNT 2 257#define PAGE_CE_TIME 72h 258#define DIMM_CE_COUNT 10 259#define DIMM_CE_TIME 1week 260 261event ereport.cpu.intel.nb.mem_ce@ 262 motherboard/memory-controller/dram-channel/dimm/rank{within(12s)}; 263 264engine serd.memory.intel.page_ce@ 265 motherboard/memory-controller/dram-channel/dimm/rank, 266 N=PAGE_CE_COUNT, T=PAGE_CE_TIME; 267event fault.memory.intel.page_ce@ 268 motherboard/memory-controller/dram-channel/dimm/rank, message=0, response=0, 269 count=stat.ce_pgflt@motherboard/memory-controller/dram-channel/dimm, 270 engine=serd.memory.intel.page_ce@ 271 motherboard/memory-controller/dram-channel/dimm/rank; 272prop fault.memory.intel.page_ce@ 273 motherboard/memory-controller/dram-channel/dimm/rank 274 { (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && 275 SET_ADDR && SET_OFFSET } (0)-> 276 ereport.cpu.intel.nb.mem_ce@ 277 motherboard/memory-controller/dram-channel/dimm/rank; 278 279engine serd.memory.intel.dimm_ce@ 280 motherboard/memory-controller/dram-channel/dimm/rank, 281 N=DIMM_CE_COUNT, T=DIMM_CE_TIME; 282event fault.memory.intel.dimm_ce@ 283 motherboard/memory-controller/dram-channel/dimm/rank, 284 engine=serd.memory.intel.dimm_ce@ 285 motherboard/memory-controller/dram-channel/dimm/rank; 286event error.memory.intel.dimm_ce@ 287 motherboard/memory-controller/dram-channel/dimm; 288prop fault.memory.intel.dimm_ce@ 289 motherboard/memory-controller/dram-channel/dimm/rank (1)-> 290 ereport.cpu.intel.nb.mem_ce@ 291 motherboard/memory-controller/dram-channel/dimm/rank; 292prop fault.memory.intel.dimm_ce@ 293 motherboard/memory-controller/dram-channel/dimm/rank 294 { !confprop_defined(motherboard/memory-controller/dram-channel/dimm, 295 "dimm-size") } (1)-> 296 error.memory.intel.dimm_ce@ 297 motherboard/memory-controller/dram-channel/dimm; 298prop error.memory.intel.dimm_ce@motherboard/memory-controller/dram-channel/dimm 299 { !confprop_defined(motherboard/memory-controller/dram-channel/dimm, 300 "dimm-size") && 301 count(stat.ce_pgflt@ 302 motherboard/memory-controller/dram-channel/dimm) > 512 } (1)-> 303 ereport.cpu.intel.nb.mem_ce@ 304 motherboard/memory-controller/dram-channel/dimm/rank<>; 305 306#define DIMM_CE(dimm_size, n, t, fault_rate) \ 307 prop fault.memory.intel.dimm_ce@ \ 308 motherboard/memory-controller/dram-channel/dimm/rank { \ 309 confprop(motherboard/memory-controller/dram-channel/dimm, \ 310 "dimm-size") == dimm_size && \ 311 setserdn(n) & setserdt(t) } (1)-> \ 312 error.memory.intel.dimm_ce@ \ 313 motherboard/memory-controller/dram-channel/dimm; \ 314 prop error.memory.intel.dimm_ce@ \ 315 motherboard/memory-controller/dram-channel/dimm { \ 316 confprop(motherboard/memory-controller/dram-channel/dimm, \ 317 "dimm-size") == dimm_size && \ 318 count(stat.ce_pgflt@ \ 319 motherboard/memory-controller/dram-channel/dimm) > fault_rate } \ 320 (1)-> \ 321 ereport.cpu.intel.nb.mem_ce@ \ 322 motherboard/memory-controller/dram-channel/dimm/rank<>; 323 324DIMM_CE("8G", 8, 1week, 2000) 325DIMM_CE("4G", 4, 1week, 1500) 326DIMM_CE("2G", 4, 2week, 1000) 327DIMM_CE("1G", 4, 4week, 500) 328DIMM_CE("512M", 4, 8week, 250) 329DIMM_CE("256M", 4, 16week, 125) 330 331event ereport.cpu.intel.nb.fbd.alert@rank{within(12s)}; 332event fault.memory.intel.fbd.alert@rank, retire=0; 333 334prop fault.memory.intel.fbd.alert@rank (1)-> 335 ereport.cpu.intel.nb.fbd.alert@rank; 336 337prop fault.memory.intel.fbd.alert@rank (0)-> 338 EREPORT_BUS_ERROR; 339 340event ereport.cpu.intel.nb.fbd.crc@rank{within(12s)}; 341event fault.memory.intel.fbd.crc@rank, retire=0; 342 343prop fault.memory.intel.fbd.crc@rank (1)-> 344 ereport.cpu.intel.nb.fbd.crc@rank; 345 346prop fault.memory.intel.fbd.crc@rank (0)-> EREPORT_BUS_ERROR; 347 348event ereport.cpu.intel.nb.fbd.reset_timeout@memory-controller {within(12s)}; 349event fault.memory.intel.fbd.reset_timeout@memory-controller, retire=0; 350 351prop fault.memory.intel.fbd.reset_timeout@memory-controller (1)-> 352 ereport.cpu.intel.nb.fbd.reset_timeout@memory-controller; 353 354prop fault.memory.intel.fbd.reset_timeout@memory-controller (0)-> 355 EREPORT_BUS_ERROR; 356 357event ereport.cpu.intel.nb.fbd.ch@dram-channel {within(12s)}; 358engine serd.cpu.intel.nb.fbd.ch@dram-channel, N=2, T=1month; 359event fault.memory.intel.fbd.ch@dram-channel, retire=0, 360 engine=serd.cpu.intel.nb.fbd.ch@dram-channel; 361 362prop fault.memory.intel.fbd.ch@dram-channel (1)-> 363 ereport.cpu.intel.nb.fbd.ch@dram-channel; 364 365prop fault.memory.intel.fbd.ch@dram-channel (0)-> 366 EREPORT_BUS_ERROR; 367 368event ereport.cpu.intel.nb.fbd.otf@dram-channel {within(12s)}; 369engine serd.cpu.intel.nb.fbd_otf@dram-channel, N=2, T=1week; 370event fault.memory.intel.fbd.otf@dram-channel, retire=0, response=0, 371 engine=serd.cpu.intel.nb.fbd_otf@dram-channel; 372 373prop fault.memory.intel.fbd.otf@dram-channel (1)-> 374 ereport.cpu.intel.nb.fbd.otf@dram-channel; 375 376event ereport.cpu.intel.nb.otf@motherboard {within(12s)}; 377event fault.cpu.intel.nb.otf@motherboard, retire=0, response=0; 378 379prop fault.cpu.intel.nb.otf@motherboard (1)-> 380 ereport.cpu.intel.nb.otf@motherboard; 381 382event ereport.cpu.intel.nb.unknown@memory-controller {within(12s)}; 383event ereport.cpu.intel.nb.unknown@memory-controller/dram-channel {within(12s)}; 384event ereport.cpu.intel.nb.spd@memory-controller/dram-channel {within(12s)}; 385event upset.discard@memory-controller; 386 387prop upset.discard@memory-controller (0)-> 388 ereport.cpu.intel.nb.unknown@memory-controller, 389 ereport.cpu.intel.nb.unknown@memory-controller/dram-channel, 390 ereport.cpu.intel.nb.spd@memory-controller/dram-channel; 391 392event ereport.cpu.intel.nb.mem_ds@memory-controller{within(30s)}; 393event fault.memory.intel.fbd.mem_ds@memory-controller/dram-channel/dimm/rank, 394 retire=0; 395 396prop fault.memory.intel.fbd.mem_ds@ 397 memory-controller/dram-channel/dimm/rank[rank_num] 398 { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)-> 399 ereport.cpu.intel.nb.mem_ds@memory-controller; 400 401event ereport.cpu.intel.nb.fsb@chip{within(12s)}; 402event fault.cpu.intel.nb.fsb@chip, retire=0; 403 404prop fault.cpu.intel.nb.fsb@chip (1)-> 405 ereport.cpu.intel.nb.fsb@chip; 406 407prop fault.cpu.intel.nb.fsb@chip (0)-> EREPORT_BUS_ERROR; 408 409event ereport.cpu.intel.nb.ie@motherboard{within(12s)}; 410event fault.cpu.intel.nb.ie@motherboard, retire=0; 411event upset.cpu.intel.nb.ie_ce@motherboard{within(12s)}; 412 413prop upset.cpu.intel.nb.ie_ce@motherboard 414 { payloadprop("intel-error-list") == "B6" } (0)-> 415 ereport.cpu.intel.nb.ie@motherboard; 416 417prop fault.cpu.intel.nb.ie@motherboard 418 { payloadprop("intel-error-list") != "B6" } (1)-> 419 ereport.cpu.intel.nb.ie@motherboard; 420 421prop fault.cpu.intel.nb.ie@motherboard (0)-> EREPORT_BUS_ERROR; 422 423event ereport.cpu.intel.nb.dma@motherboard{within(12s)}; 424event fault.cpu.intel.nb.dma@motherboard, retire=0, response=0; 425 426prop fault.cpu.intel.nb.dma@motherboard (1)-> 427 ereport.cpu.intel.nb.dma@motherboard; 428 429prop fault.cpu.intel.nb.dma@motherboard (0)-> EREPORT_BUS_ERROR; 430 431event ereport.cpu.intel.nb.esi@motherboard{within(12s)}; 432event ereport.cpu.intel.nb.pex@hostbridge{within(12s)}; 433event upset.cpu.intel.nb.pex@hostbridge; 434 435prop upset.cpu.intel.nb.pex@hostbridge (1)-> 436 ereport.cpu.intel.nb.esi@motherboard, 437 ereport.cpu.intel.nb.pex@hostbridge; 438 439prop upset.cpu.intel.nb.pex@hostbridge (0)-> EREPORT_BUS_ERROR; 440 441event ereport.cpu.intel.nb.unknown@rank{within(12s)}; 442event upset.discard@rank; 443 444prop upset.discard@rank (1)-> 445 ereport.cpu.intel.nb.unknown@rank; 446 447prop upset.discard@rank (0)-> EREPORT_BUS_ERROR; 448 449/* 450 * CPU integrated memory controller 451 */ 452 453#define CONTAINS_RANK (payloadprop_contains("resource", \ 454 asru(motherboard/chip/memory-controller/dram-channel/dimm/rank)) || \ 455 payloadprop_contains("resource", \ 456 asru(motherboard/chip/memory-controller/dram-channel/dimm))) 457 458#define STAT_CPU_MEM_CE_PGFLTS \ 459 stat.ce_pgflt@motherboard/chip/memory-controller/dram-channel/dimm 460#define CPU_MEM_CE_PGFLTS \ (count(STAT_CPU_MEM_CE_PGFLTS)) 461 462#define SET_RES_OFFSET \ 463 (!payloadprop_defined("resource[0].hc-specific.offset") || \ 464 setpayloadprop("asru-offset", \ 465 payloadprop("resource[0].hc-specific.offset"))) 466 467engine STAT_CPU_MEM_CE_PGFLTS; 468 469event ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller 470 {within(12s)}; 471 472event fault.memory.intel.page_ue@ 473 motherboard/chip/memory-controller/dram-channel/dimm/rank, 474 message=0, response=0; /* do not message individual pageflts */ 475 476prop fault.memory.intel.page_ue@ 477 motherboard/chip/memory-controller/dram-channel/dimm/rank 478 { CONTAINS_RANK && (payloadprop_defined("physaddr") || 479 payloadprop_defined("resource[0].hc-specific.offset")) && 480 SET_ADDR && SET_RES_OFFSET } (1)-> 481 ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller; 482 483event fault.memory.intel.dimm_ue@ 484 motherboard/chip/memory-controller/dram-channel/dimm/rank; 485 486prop fault.memory.intel.dimm_ue@ 487 motherboard/chip/memory-controller/dram-channel/dimm/rank 488 { CONTAINS_RANK } (1)-> 489 ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller; 490 491prop fault.memory.intel.dimm_ue@ 492 motherboard/chip/memory-controller/dram-channel/dimm/rank (0)-> 493 EREPORT_BUS_ERROR; 494 495event ereport.cpu.intel.quickpath.mem_ce@ 496 motherboard/chip/memory-controller{within(12s)}; 497 498engine serd.memory.intel.page_ce@ 499 motherboard/chip/memory-controller/dram-channel/dimm/rank, 500 N=PAGE_CE_COUNT, T=PAGE_CE_TIME; 501 502event fault.memory.intel.page_ce@ 503 motherboard/chip/memory-controller/dram-channel/dimm/rank, 504 message=0, response=0, 505 count=STAT_CPU_MEM_CE_PGFLTS, 506 engine=serd.memory.intel.page_ce@ 507 motherboard/chip/memory-controller/dram-channel/dimm/rank; 508 509prop fault.memory.intel.page_ce@ 510 motherboard/chip/memory-controller/dram-channel/dimm/rank 511 { CONTAINS_RANK && (payloadprop_defined("physaddr") || 512 payloadprop_defined("resource[0].hc-specific.offset")) && 513 SET_ADDR && SET_RES_OFFSET } (1)-> 514 ereport.cpu.intel.quickpath.mem_ce@motherboard/chip/memory-controller; 515 516engine serd.memory.intel.dimm_ce@ 517 motherboard/chip/memory-controller/dram-channel/dimm, 518 N=PAGE_CE_COUNT, T=PAGE_CE_TIME; 519event fault.memory.intel.dimm_ce@ 520 motherboard/chip/memory-controller/dram-channel/dimm, 521 engine=serd.memory.intel.dimm_ce@ 522 motherboard/chip/memory-controller/dram-channel/dimm; 523event error.memory.intel.dimm_ce@ 524 motherboard/chip/memory-controller/dram-channel/dimm; 525prop fault.memory.intel.dimm_ce@ 526 motherboard/chip/memory-controller/dram-channel/dimm 527 { !confprop_defined(motherboard/chip/memory-controller/dram-channel/dimm, 528 "dimm-size") } (1)-> 529 error.memory.intel.dimm_ce@ 530 motherboard/chip/memory-controller/dram-channel/dimm; 531prop error.memory.intel.dimm_ce@ 532 motherboard/chip/memory-controller/dram-channel/dimm 533 { !confprop_defined(motherboard/chip/memory-controller/dram-channel/dimm, 534 "dimm-size") && 535 count(STAT_CPU_MEM_CE_PGFLTS) > 512 } (1)-> 536 ereport.cpu.intel.quickpath.mem_ce@motherboard/chip/memory-controller; 537 538#define CPU_MEM_DIMM_CE(dimm_size, n, t, fault_rate) \ 539 prop fault.memory.intel.dimm_ce@ \ 540 motherboard/chip/memory-controller/dram-channel/dimm { \ 541 confprop(motherboard/chip/memory-controller/dram-channel/dimm, \ 542 "dimm-size") == dimm_size && \ 543 setserdn(n) & setserdt(t) } (1)-> \ 544 error.memory.intel.dimm_ce@ \ 545 motherboard/chip/memory-controller/dram-channel/dimm; \ 546 prop error.memory.intel.dimm_ce@ \ 547 motherboard/chip/memory-controller/dram-channel/dimm { \ 548 confprop(motherboard/chip/memory-controller/dram-channel/dimm, \ 549 "dimm-size") == dimm_size && \ 550 count(STAT_CPU_MEM_CE_PGFLTS) > fault_rate } (1)-> \ 551 ereport.cpu.intel.quickpath.mem_ce@ \ 552 motherboard/chip/memory-controller; 553 554CPU_MEM_DIMM_CE("16G", 16, 1week, 2000) 555CPU_MEM_DIMM_CE("8G", 8, 1week, 2000) 556CPU_MEM_DIMM_CE("4G", 4, 1week, 1500) 557CPU_MEM_DIMM_CE("2G", 4, 2week, 1000) 558CPU_MEM_DIMM_CE("1G", 4, 4week, 500) 559CPU_MEM_DIMM_CE("512M", 4, 8week, 250) 560 561event ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller {within(12s)}; 562event ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller/dram-channel 563 {within(12s)}; 564event ereport.cpu.intel.quickpath.mem_unknown@ 565 motherboard/chip/memory-controller/dram-channel/dimm/rank{within(12s)}; 566event upset.discard@motherboard/chip/memory-controller; 567event upset.discard@motherboard/chip/memory-controller/dram-channel/dimm/rank; 568 569prop upset.discard@motherboard/chip/memory-controller (0)-> 570 ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller, 571 ereport.cpu.intel.quickpath.mem_unknown@ 572 motherboard/chip/memory-controller/dram-channel; 573 574prop upset.discard@ 575 motherboard/chip/memory-controller/dram-channel/dimm/rank (1)-> 576 ereport.cpu.intel.quickpath.mem_unknown@ 577 motherboard/chip/memory-controller/dram-channel/dimm/rank; 578 579event ereport.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller {within(1s)}; 580event fault.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller; 581 582prop fault.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller (1)-> 583 ereport.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller; 584 585event ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller {within(1s)}; 586event fault.cpu.intel.quickpath.mem_addr_parity@ 587 motherboard/chip/memory-controller/dram-channel/dimm; 588event fault.cpu.intel.quickpath.mem_addr_parity@ 589 motherboard/chip/memory-controller; 590 591prop fault.cpu.intel.quickpath.mem_addr_parity@ 592 motherboard/chip/memory-controller (1)-> 593 ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller; 594 595prop fault.cpu.intel.quickpath.mem_addr_parity@ 596 motherboard/chip/memory-controller/dram-channel/dimm 597 { payloadprop_contains("resource", asru(motherboard/chip/memory-controller/dram-channel/dimm)) } (1)-> 598 ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller; 599 600event ereport.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller {within(1s)}; 601event fault.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller; 602 603prop fault.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller (1)-> 604 ereport.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller; 605 606event ereport.cpu.intel.quickpath.mem_spare@motherboard/chip/memory-controller {within(1s)}; 607event fault.cpu.intel.quickpath.mem_spare@ 608 motherboard/chip/memory-controller/dram-channel/dimm; 609 610prop fault.cpu.intel.quickpath.mem_spare@ 611 motherboard/chip/memory-controller/dram-channel/dimm (1)-> 612 ereport.cpu.intel.quickpath.mem_spare@motherboard/chip/memory-controller; 613 614event ereport.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller {within(1s)}; 615event fault.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller; 616 617prop fault.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller (1)-> 618 ereport.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller; 619 620event ereport.cpu.intel.quickpath.mem_redundant@motherboard/chip/memory-controller {within(1s)}; 621engine serd.cpu.intel.quickpath.mem_redundant@ 622 motherboard/chip/memory-controller/dram-channel/dimm, 623 N=2, T=72h; 624event fault.cpu.intel.quickpath.mem_redundant@ 625 motherboard/chip/memory-controller/dram-channel/dimm, 626 engine=serd.cpu.intel.quickpath.mem_redundant@ 627 motherboard/chip/memory-controller/dram-channel/dimm; 628 629prop fault.cpu.intel.quickpath.mem_redundant@ 630 motherboard/chip/memory-controller/dram-channel/dimm 631 { payloadprop_contains("resource", 632 asru(motherboard/chip/memory-controller/dram-channel/dimm)) } (1)-> 633 ereport.cpu.intel.quickpath.mem_redundant@ 634 motherboard/chip/memory-controller; 635 636event ereport.cpu.intel.quickpath.interconnect@motherboard/chip 637 {within(1s)}; 638event upset.cpu.intel.quickpath.interconnect@motherboard/chip; 639/* Diagnose corrected events to upsets */ 640prop upset.cpu.intel.quickpath.interconnect@motherboard/chip 641 { !STATUS_UC } (1)-> 642 ereport.cpu.intel.quickpath.interconnect@motherboard/chip; 643 644 645engine serd.cpu.intel.quickpath.interconnect@motherboard/chip, 646 N=3, T=72h; 647event fault.cpu.intel.quickpath.interconnect@motherboard/chip, 648 engine=serd.cpu.intel.quickpath.interconnect@motherboard/chip; 649 650/* Diagnose uncorrected events to faults */ 651prop fault.cpu.intel.quickpath.interconnect@motherboard/chip 652 { STATUS_UC } (0)-> 653 ereport.cpu.intel.quickpath.interconnect@motherboard/chip; 654