1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2019 Joyent, Inc.
14 */
15
16 /*
17 * Memory decoding logic.
18 *
19 * This file is part of the 'imc' driver on x86. It supports taking a physical
20 * address and determining what the corresponding DIMM is. This is shared
21 * between the kernel and userland for easier testing.
22 *
23 * For more information about the different parts of the decoding process,
24 * please see the file 'uts/i86pc/io/imc/imc.c'.
25 */
26
27 #include <sys/sysmacros.h>
28
29 #ifndef _KERNEL
30 #include <stdint.h>
31 #include <strings.h>
32 #define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
33 #endif /* !_KERNEL */
34
35 #include "imc.h"
36
37 /*
38 * Address ranges for decoding system addresses. There are three ranges that
39 * exist on x86, traditional DOS memory (hi 640 KiB), low memory, and high
40 * memory. Low memory always starts at 1 MiB and high memory always starts at 4
41 * GiB. The upper bounds of these ranges is based on registers on the system.
42 */
43 #define IMC_DECODE_CONV_BASE 0UL
44 #define IMC_DECODE_CONV_MAX 0x00009ffffULL /* 640 KiB - 1 */
45 #define IMC_DECODE_LOW_BASE 0x000100000ULL /* 1 M */
46 #define IMC_DECODE_HIGH_BASE 0x100000000ULL /* 4 GiB */
47
48 typedef struct imc_legacy_range {
49 uint64_t ilr_base;
50 size_t ilr_len;
51 const char *ilr_desc;
52 } imc_legacy_range_t;
53
54 /*
55 * These represent regions of memory that are reserved for use and will not be
56 * decoded by DRAM.
57 */
58 static imc_legacy_range_t imc_legacy_ranges[] = {
59 { 0x00000A0000ULL, 128 * 1024, "VGA" },
60 { 0x00000C0000ULL, 256 * 1024, "PAM" },
61 { 0x0000F00000ULL, 1024 * 1024, "Reserved" },
62 { 0x00FE000000ULL, 32 * 1024 * 1024, "Unknown" },
63 { 0x00FF000000ULL, 16 * 1024 * 1024, "Firmware" },
64 { 0x00FED20000ULL, 384 * 1024, "TXT" },
65 { 0x00FED00000ULL, 1024 * 1024, "PCH" },
66 { 0x00FEC00000ULL, 1024 * 1024, "IOAPIC" },
67 { 0x00FEB80000ULL, 512 * 1024, "Reserved" },
68 { 0x00FEB00000ULL, 64 * 1024, "Reserved" }
69 };
70
71 /*
72 * Determine whether or not this address is in one of the reserved regions or if
73 * it falls outside of the explicit DRAM ranges.
74 */
75 static boolean_t
imc_decode_addr_resvd(const imc_t * imc,imc_decode_state_t * dec)76 imc_decode_addr_resvd(const imc_t *imc, imc_decode_state_t *dec)
77 {
78 uint_t i;
79 const imc_sad_t *sad;
80
81 for (i = 0; i < ARRAY_SIZE(imc_legacy_ranges); i++) {
82 uint64_t end = imc_legacy_ranges[i].ilr_base +
83 imc_legacy_ranges[i].ilr_len;
84
85 if (dec->ids_pa >= imc_legacy_ranges[i].ilr_base &&
86 dec->ids_pa < end) {
87 dec->ids_fail = IMC_DECODE_F_LEGACY_RANGE;
88 dec->ids_fail_data = i;
89 return (B_TRUE);
90 }
91 }
92
93 /*
94 * For checking and determining whether or not we fit in DRAM, we need
95 * to check against the top of low memory and the top of high memory.
96 * While we technically have this information on a per-socket basis, we
97 * have to rely on the fact that both processors have the same
98 * information. A requirement which if not true, would lead to chaos
99 * depending on what socket we're running on.
100 */
101 sad = &imc->imc_sockets[0].isock_sad;
102 if (sad->isad_valid != IMC_SAD_V_VALID) {
103 dec->ids_fail = IMC_DECODE_F_BAD_SAD;
104 return (B_TRUE);
105 }
106
107 /*
108 * An address may fall into three ranges. It may fall into conventional
109 * memory. It may fall into low memory. It may fall into high memory.
110 * The conventional memory range is inclusive at the top. The others
111 * have been translated such that they are uniformly exclusive at the
112 * top. Because the bottom of conventional memory is at zero, the
113 * compiler will be angry if we compare against IMC_DECODE_CONV_BASE as
114 * it is always true.
115 */
116 if (dec->ids_pa <= IMC_DECODE_CONV_MAX) {
117 return (B_FALSE);
118 }
119
120 if (dec->ids_pa >= IMC_DECODE_LOW_BASE &&
121 dec->ids_pa < sad->isad_tolm) {
122 return (B_FALSE);
123 }
124
125 if (dec->ids_pa >= IMC_DECODE_HIGH_BASE &&
126 dec->ids_pa < sad->isad_tohm) {
127 return (B_FALSE);
128 }
129
130 /*
131 * Memory fell outside of the valid range. It's not for us.
132 */
133 dec->ids_fail = IMC_DECODE_F_OUTSIDE_DRAM;
134 return (B_TRUE);
135 }
136
137 static uint_t
imc_decode_sad_interleave(const imc_sad_rule_t * rule,uint64_t pa)138 imc_decode_sad_interleave(const imc_sad_rule_t *rule, uint64_t pa)
139 {
140 uint_t itgt = 0;
141
142 switch (rule->isr_imode) {
143 case IMC_SAD_IMODE_8t6:
144 if (rule->isr_a7mode) {
145 itgt = BITX(pa, 9, 9);
146 itgt |= (BITX(pa, 8, 7) << 1);
147 } else {
148 itgt = BITX(pa, 8, 6);
149 }
150 break;
151 case IMC_SAD_IMODE_8t6XOR:
152 if (rule->isr_a7mode) {
153 itgt = BITX(pa, 9, 9);
154 itgt |= (BITX(pa, 8, 7) << 1);
155 } else {
156 itgt = BITX(pa, 8, 6);
157 }
158 itgt ^= BITX(pa, 18, 16);
159 break;
160 case IMC_SAD_IMODE_10t8:
161 itgt = BITX(pa, 10, 8);
162 break;
163 case IMC_SAD_IMODE_14t12:
164 itgt = BITX(pa, 14, 12);
165 break;
166 case IMC_SAD_IMODE_32t30:
167 itgt = BITX(pa, 32, 30);
168 break;
169 }
170
171 return (itgt);
172 }
173
174 /*
175 * Use the system address decoder to try and find a valid SAD entry for this
176 * address. We always use socket zero's SAD as the SAD rules should be the same
177 * between the different sockets.
178 */
179 static boolean_t
imc_decode_sad(const imc_t * imc,imc_decode_state_t * dec)180 imc_decode_sad(const imc_t *imc, imc_decode_state_t *dec)
181 {
182 uint_t i, ileaveidx;
183 uint8_t ileavetgt;
184 uint32_t nodeid, tadid, channelid;
185 uint64_t base;
186 const imc_socket_t *socket = &imc->imc_sockets[0];
187 const imc_sad_t *sad = &socket->isock_sad;
188 const imc_sad_rule_t *rule;
189 boolean_t loop = B_FALSE;
190
191 /*
192 * Note, all SAD rules have been adjusted so that they are uniformly
193 * exclusive.
194 */
195 start:
196 for (rule = NULL, i = 0, base = 0; i < sad->isad_nrules; i++) {
197 rule = &sad->isad_rules[i];
198
199 if (rule->isr_enable && dec->ids_pa >= base &&
200 dec->ids_pa < rule->isr_limit) {
201 break;
202 }
203
204 base = rule->isr_limit;
205 }
206
207 if (rule == NULL || i == sad->isad_nrules) {
208 dec->ids_fail = IMC_DECODE_F_NO_SAD_RULE;
209 return (B_FALSE);
210 }
211
212 /*
213 * Store the SAD rule in the decode information for debugging's sake.
214 */
215 dec->ids_sad = sad;
216 dec->ids_sad_rule = rule;
217
218 /*
219 * We have found a SAD rule. We now need to transform that into the
220 * corresponding target based on its mode, etc. The way we do this
221 * varies based on the generation.
222 *
223 * The first thing we need to do is to figure out the target in the
224 * interleave list.
225 */
226 ileaveidx = imc_decode_sad_interleave(rule, dec->ids_pa);
227 if (ileaveidx >= rule->isr_ntargets) {
228 dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
229 dec->ids_fail_data = ileaveidx;
230 return (B_FALSE);
231 }
232 ileavetgt = rule->isr_targets[ileaveidx];
233 if (imc->imc_gen >= IMC_GEN_SKYLAKE &&
234 IMC_SAD_ILEAVE_SKX_LOCAL(ileavetgt) == 0) {
235 /*
236 * If we're in this case, the interleave rule said we had a
237 * remote target. That means we need to find the correct SAD
238 * based on the Node ID and then do all of this over again.
239 */
240 nodeid = IMC_SAD_ILEAVE_SKX_TARGET(ileavetgt);
241
242 if (loop) {
243 dec->ids_fail = IMC_DECODE_F_SAD_SEARCH_LOOP;
244 return (B_FALSE);
245 }
246
247 for (i = 0; i < imc->imc_nsockets; i++) {
248 if (imc->imc_sockets[i].isock_valid ==
249 IMC_SOCKET_V_VALID &&
250 imc->imc_sockets[i].isock_nodeid == nodeid) {
251 socket = &imc->imc_sockets[i];
252 sad = &imc->imc_sockets[i].isock_sad;
253 loop = B_TRUE;
254 goto start;
255 }
256 }
257
258 dec->ids_fail = IMC_DECODE_F_BAD_REMOTE_MC_ROUTE;
259 dec->ids_fail_data = nodeid;
260 return (B_FALSE);
261 }
262
263 /*
264 * On some platforms we need to derive the target channel based on the
265 * physical address and additional rules in the SAD. If we do, do that
266 * here. The idea is that this may overrule the memory channel route
267 * table target that was determined from the SAD rule.
268 */
269 if (rule->isr_need_mod3) {
270 uint64_t addr;
271 uint8_t channel;
272
273 switch (rule->isr_mod_mode) {
274 case IMC_SAD_MOD_MODE_45t6:
275 addr = dec->ids_pa >> 6;
276 break;
277 case IMC_SAD_MOD_MODE_45t8:
278 addr = dec->ids_pa >> 8;
279 break;
280 case IMC_SAD_MOD_MODE_45t12:
281 addr = dec->ids_pa >> 12;
282 break;
283 default:
284 dec->ids_fail = IMC_DECODE_F_SAD_BAD_MOD;
285 return (B_FALSE);
286 }
287
288 switch (rule->isr_mod_type) {
289 case IMC_SAD_MOD_TYPE_MOD3:
290 channel = (addr % 3) << 1;
291 channel |= ileavetgt & 1;
292 break;
293 case IMC_SAD_MOD_TYPE_MOD2_01:
294 channel = (addr % 2) << 1;
295 channel |= ileavetgt & 1;
296 break;
297 case IMC_SAD_MOD_TYPE_MOD2_12:
298 channel = (addr % 2) << 2;
299 channel |= (~addr % 2) << 1;
300 channel |= ileavetgt & 1;
301 break;
302 case IMC_SAD_MOD_TYPE_MOD2_02:
303 channel = (addr % 2) << 2;
304 channel |= ileavetgt & 1;
305 break;
306 default:
307 dec->ids_fail = IMC_DECODE_F_SAD_BAD_MOD;
308 return (B_FALSE);
309 }
310
311 ileavetgt = channel;
312 }
313
314 switch (imc->imc_gen) {
315 case IMC_GEN_SANDY:
316 /*
317 * Sandy Bridge systems only have a single home agent, so the
318 * interleave target is always the node id.
319 */
320 nodeid = ileavetgt;
321 tadid = 0;
322 channelid = UINT32_MAX;
323 break;
324 case IMC_GEN_IVY:
325 case IMC_GEN_HASWELL:
326 case IMC_GEN_BROADWELL:
327 /*
328 * On these generations, the interleave NodeID in the SAD
329 * encodes both the nodeid and the home agent ID that we care
330 * about.
331 */
332 nodeid = IMC_NODEID_IVY_BRD_UPPER(ileavetgt) |
333 IMC_NODEID_IVY_BRD_LOWER(ileavetgt);
334 tadid = IMC_NODEID_IVY_BRD_HA(ileavetgt);
335 channelid = UINT32_MAX;
336 break;
337 case IMC_GEN_SKYLAKE:
338 /*
339 * On Skylake generation systems we take the interleave target
340 * and use that to look up both the memory controller and the
341 * physical channel in the route table. The nodeid is already
342 * known because its SAD rules redirect us.
343 */
344 nodeid = socket->isock_nodeid;
345 if (ileavetgt > IMC_SAD_ILEAVE_SKX_MAX) {
346 dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
347 dec->ids_fail_data = ileavetgt;
348 return (B_FALSE);
349 }
350 ileavetgt = IMC_SAD_ILEAVE_SKX_TARGET(ileavetgt);
351 if (ileavetgt > sad->isad_mcroute.ismc_nroutes) {
352 dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
353 dec->ids_fail_data = ileavetgt;
354 return (B_FALSE);
355 }
356 tadid = sad->isad_mcroute.ismc_mcroutes[ileavetgt].ismce_imc;
357 channelid =
358 sad->isad_mcroute.ismc_mcroutes[ileavetgt].ismce_pchannel;
359 break;
360 default:
361 nodeid = tadid = channelid = UINT32_MAX;
362 break;
363 }
364
365 /*
366 * Map to the correct socket based on the nodeid. Make sure that we have
367 * a valid TAD.
368 */
369 dec->ids_socket = NULL;
370 for (i = 0; i < imc->imc_nsockets; i++) {
371 if (imc->imc_sockets[i].isock_nodeid == nodeid) {
372 dec->ids_socket = &imc->imc_sockets[i];
373 break;
374 }
375 }
376 if (dec->ids_socket == NULL) {
377 dec->ids_fail = IMC_DECODE_F_SAD_BAD_SOCKET;
378 dec->ids_fail_data = nodeid;
379 return (B_FALSE);
380 }
381
382 if (tadid >= dec->ids_socket->isock_ntad) {
383 dec->ids_fail = IMC_DECODE_F_SAD_BAD_TAD;
384 dec->ids_fail_data = tadid;
385 return (B_FALSE);
386 }
387
388 dec->ids_nodeid = nodeid;
389 dec->ids_tadid = tadid;
390 dec->ids_channelid = channelid;
391 dec->ids_tad = &dec->ids_socket->isock_tad[tadid];
392 dec->ids_mc = &dec->ids_socket->isock_imcs[tadid];
393
394 return (B_TRUE);
395 }
396
397 /*
398 * For Sandy Bridge through Broadwell we need to decode the memory channel that
399 * we're targeting. This is determined based on the number of ways that the
400 * socket and channel are supposed to be interleaved. The TAD has a target
401 * channel list sitting with the TAD rule. To figure out the appropriate index,
402 * the algorithm is roughly:
403 *
404 * idx = [(dec->ids_pa >> 6) / socket-ways] % channel-ways
405 *
406 * The shift by six, comes from taking the number of bits that are in theory in
407 * the cache line size. Of course, if things were this simple, that'd be great.
408 * The first complication is a7mode / MCChanShiftUpEnable. When this is enabled,
409 * more cache lines are used for this. The next complication comes when the
410 * feature MCChanHashEn is enabled. This means that we have to hash the
411 * resulting address before we do the modulus based on the number of channel
412 * ways.
413 *
414 * The last, and most complicated problem is when the number of channel ways is
415 * set to three. When this is the case, the base address of the range may not
416 * actually start at index zero. The nominal solution is to use the offset
417 * that's programmed on a per-channel basis to offset the system address.
418 * However, to get that information we would have to know what channel we're on,
419 * which is what we're trying to figure out. Regretfully, proclaim that we can't
420 * in this case.
421 */
422 static boolean_t
imc_decode_tad_channel(const imc_t * imc,imc_decode_state_t * dec)423 imc_decode_tad_channel(const imc_t *imc, imc_decode_state_t *dec)
424 {
425 uint64_t index;
426 const imc_tad_rule_t *rule = dec->ids_tad_rule;
427
428 index = dec->ids_pa >> 6;
429 if ((dec->ids_tad->itad_flags & IMC_TAD_FLAG_CHANSHIFT) != 0) {
430 index = index >> 1;
431 }
432
433 /*
434 * When performing a socket way equals three comparison, this would not
435 * work.
436 */
437 index = index / rule->itr_sock_way;
438
439 if ((dec->ids_tad->itad_flags & IMC_TAD_FLAG_CHANHASH) != 0) {
440 uint_t i;
441 for (i = 12; i < 28; i += 2) {
442 uint64_t shift = (dec->ids_pa >> i) & 0x3;
443 index ^= shift;
444 }
445 }
446
447 index %= rule->itr_chan_way;
448 if (index >= rule->itr_ntargets) {
449 dec->ids_fail = IMC_DECODE_F_TAD_BAD_TARGET_INDEX;
450 dec->ids_fail_data = index;
451 return (B_FALSE);
452 }
453
454 dec->ids_channelid = rule->itr_targets[index];
455 return (B_TRUE);
456 }
457
458 static uint_t
imc_tad_gran_to_shift(const imc_tad_t * tad,imc_tad_gran_t gran)459 imc_tad_gran_to_shift(const imc_tad_t *tad, imc_tad_gran_t gran)
460 {
461 uint_t shift = 0;
462
463 switch (gran) {
464 case IMC_TAD_GRAN_64B:
465 shift = 6;
466 if ((tad->itad_flags & IMC_TAD_FLAG_CHANSHIFT) != 0) {
467 shift++;
468 }
469 break;
470 case IMC_TAD_GRAN_256B:
471 shift = 8;
472 break;
473 case IMC_TAD_GRAN_4KB:
474 shift = 12;
475 break;
476 case IMC_TAD_GRAN_1GB:
477 shift = 30;
478 break;
479 }
480
481 return (shift);
482 }
483
484 static boolean_t
imc_decode_tad(const imc_t * imc,imc_decode_state_t * dec)485 imc_decode_tad(const imc_t *imc, imc_decode_state_t *dec)
486 {
487 uint_t i, tadruleno;
488 uint_t sockshift, chanshift, sockmask, chanmask;
489 uint64_t off, chanaddr;
490 const imc_tad_t *tad = dec->ids_tad;
491 const imc_mc_t *mc = dec->ids_mc;
492 const imc_tad_rule_t *rule = NULL;
493 const imc_channel_t *chan;
494
495 /*
496 * The first step in all of this is to determine which TAD rule applies
497 * for this address.
498 */
499 for (i = 0; i < tad->itad_nrules; i++) {
500 rule = &tad->itad_rules[i];
501
502 if (dec->ids_pa >= rule->itr_base &&
503 dec->ids_pa < rule->itr_limit) {
504 break;
505 }
506 }
507
508 if (rule == NULL || i == tad->itad_nrules) {
509 dec->ids_fail = IMC_DECODE_F_NO_TAD_RULE;
510 return (B_FALSE);
511 }
512 tadruleno = i;
513 dec->ids_tad_rule = rule;
514
515 /*
516 * Check if our TAD rule requires 3-way interleaving on the channel. We
517 * basically can't do that right now. For more information, see the
518 * comment above imc_decode_tad_channel().
519 */
520 if (rule->itr_chan_way == 3) {
521 dec->ids_fail = IMC_DECODE_F_TAD_3_ILEAVE;
522 return (B_FALSE);
523 }
524
525 /*
526 * On some platforms, we need to now calculate the channel index from
527 * this. The way that we calculate this is nominally straightforward,
528 * but complicated by a number of different issues.
529 */
530 switch (imc->imc_gen) {
531 case IMC_GEN_SANDY:
532 case IMC_GEN_IVY:
533 case IMC_GEN_HASWELL:
534 case IMC_GEN_BROADWELL:
535 if (!imc_decode_tad_channel(imc, dec)) {
536 return (B_FALSE);
537 }
538 break;
539 default:
540 /*
541 * On Skylake and newer platforms we should have already decoded
542 * the target channel based on using the memory controller route
543 * table above.
544 */
545 break;
546 }
547
548 /*
549 * We initialize ids_channelid to UINT32_MAX, so this should make sure
550 * that we catch an incorrect channel as well.
551 */
552 if (dec->ids_channelid >= mc->icn_nchannels) {
553 dec->ids_fail = IMC_DECODE_F_BAD_CHANNEL_ID;
554 dec->ids_fail_data = dec->ids_channelid;
555 return (B_FALSE);
556 }
557 chan = &mc->icn_channels[dec->ids_channelid];
558 dec->ids_chan = chan;
559
560 if (tadruleno >= chan->ich_ntad_offsets) {
561 dec->ids_fail = IMC_DECODE_F_BAD_CHANNEL_TAD_OFFSET;
562 dec->ids_fail_data = tadruleno;
563 return (B_FALSE);
564 }
565
566 /*
567 * Now we can go ahead and calculate the channel address, which is
568 * roughly equal to:
569 *
570 * chan_addr = (sys_addr - off) / (chan way * sock way).
571 *
572 * The catch is that we want to preserve the low bits where possible.
573 * The number of bits is based on the interleaving granularities, the
574 * way that's calculated is based on information in the TAD rule.
575 * However, if a7mode is enabled on Ivy Bridge through Broadwell, then
576 * we need to add one to that. So we will save the smallest number of
577 * bits that are left after interleaving.
578 *
579 * Because the interleaving occurs at different granularities, we need
580 * to break this into two discrete steps, one where we apply the socket
581 * interleaving and one where we apply the channel interleaving,
582 * shifting and dividing at each step.
583 */
584 off = chan->ich_tad_offsets[tadruleno];
585 if (off > dec->ids_pa) {
586 dec->ids_fail = IMC_DECODE_F_CHANOFF_UNDERFLOW;
587 return (B_FALSE);
588 }
589 chanshift = imc_tad_gran_to_shift(tad, rule->itr_chan_gran);
590 sockshift = imc_tad_gran_to_shift(tad, rule->itr_sock_gran);
591 chanmask = (1 << chanshift) - 1;
592 sockmask = (1 << sockshift) - 1;
593
594 chanaddr = dec->ids_pa - off;
595 chanaddr >>= sockshift;
596 chanaddr /= rule->itr_sock_way;
597 chanaddr <<= sockshift;
598 chanaddr |= dec->ids_pa & sockmask;
599 chanaddr >>= chanshift;
600 chanaddr /= rule->itr_chan_way;
601 chanaddr <<= chanshift;
602 chanaddr |= dec->ids_pa & chanmask;
603
604 dec->ids_chanaddr = chanaddr;
605
606 return (B_TRUE);
607 }
608
609 static boolean_t
imc_decode_rir(const imc_t * imc,imc_decode_state_t * dec)610 imc_decode_rir(const imc_t *imc, imc_decode_state_t *dec)
611 {
612 const imc_mc_t *mc = dec->ids_mc;
613 const imc_channel_t *chan = dec->ids_chan;
614 const imc_rank_ileave_t *rir = NULL;
615 const imc_rank_ileave_entry_t *rirtarg;
616 const imc_dimm_t *dimm;
617 uint32_t shift, index;
618 uint_t i, dimmid, rankid;
619 uint64_t mask, base, rankaddr;
620
621 if (mc->icn_closed) {
622 shift = IMC_PAGE_BITS_CLOSED;
623 } else {
624 shift = IMC_PAGE_BITS_OPEN;
625 }
626 mask = (1UL << shift) - 1;
627
628 for (i = 0, base = 0; i < chan->ich_nrankileaves; i++) {
629 rir = &chan->ich_rankileaves[i];
630 if (rir->irle_enabled && dec->ids_chanaddr >= base &&
631 dec->ids_chanaddr < rir->irle_limit) {
632 break;
633 }
634
635 base = rir->irle_limit;
636 }
637
638 if (rir == NULL || i == chan->ich_nrankileaves) {
639 dec->ids_fail = IMC_DECODE_F_NO_RIR_RULE;
640 return (B_FALSE);
641 }
642 dec->ids_rir = rir;
643
644 /*
645 * Determine the index of the rule that we care about. This is done by
646 * shifting the address based on the open and closed page bits and then
647 * just modding it by the number of ways in question.
648 */
649 index = (dec->ids_chanaddr >> shift) % rir->irle_nways;
650 if (index >= rir->irle_nentries) {
651 dec->ids_fail = IMC_DECODE_F_BAD_RIR_ILEAVE_TARGET;
652 dec->ids_fail_data = index;
653 return (B_FALSE);
654 }
655 rirtarg = &rir->irle_entries[index];
656
657 /*
658 * The rank interleaving register has information about a physical rank
659 * target. This is within the notion of the physical chip selects that
660 * exist. While the memory controller only has eight actual chip
661 * selects, the physical values that are programmed depend a bit on the
662 * underlying hardware. Effectively, in this ID space, each DIMM has
663 * four ranks associated with it. Even when we only have two ranks with
664 * each physical channel, they'll be programmed so we can simply do the
665 * following match:
666 *
667 * DIMM = rank id / 4
668 * RANK = rank id % 4
669 */
670 dec->ids_physrankid = rirtarg->irle_target;
671 dimmid = dec->ids_physrankid / 4;
672 rankid = dec->ids_physrankid % 4;
673
674 if (dimmid >= chan->ich_ndimms) {
675 dec->ids_fail = IMC_DECODE_F_BAD_DIMM_INDEX;
676 dec->ids_fail_data = dimmid;
677 return (B_FALSE);
678 }
679
680 dimm = &chan->ich_dimms[dimmid];
681 if (!dimm->idimm_present) {
682 dec->ids_fail = IMC_DECODE_F_DIMM_NOT_PRESENT;
683 return (B_FALSE);
684 }
685 dec->ids_dimmid = dimmid;
686 dec->ids_dimm = dimm;
687
688 if (rankid >= dimm->idimm_nranks) {
689 dec->ids_fail = IMC_DECODE_F_BAD_DIMM_RANK;
690 dec->ids_fail_data = rankid;
691 return (B_FALSE);
692 }
693 dec->ids_rankid = rankid;
694
695 /*
696 * Calculate the rank address. We need to divide the address by the
697 * number of rank ways and then or in the lower bits.
698 */
699 rankaddr = dec->ids_chanaddr;
700 rankaddr >>= shift;
701 rankaddr /= rir->irle_nways;
702 rankaddr <<= shift;
703 rankaddr |= dec->ids_chanaddr & mask;
704
705 if (rirtarg->irle_offset > rankaddr) {
706 dec->ids_fail = IMC_DECODE_F_RANKOFF_UNDERFLOW;
707 return (B_FALSE);
708 }
709 rankaddr -= rirtarg->irle_offset;
710 dec->ids_rankaddr = rankaddr;
711
712 return (B_TRUE);
713 }
714
715 boolean_t
imc_decode_pa(const imc_t * imc,uint64_t pa,imc_decode_state_t * dec)716 imc_decode_pa(const imc_t *imc, uint64_t pa, imc_decode_state_t *dec)
717 {
718 bzero(dec, sizeof (*dec));
719 dec->ids_pa = pa;
720 dec->ids_nodeid = dec->ids_tadid = dec->ids_channelid = UINT32_MAX;
721
722 /*
723 * We need to rely on socket zero's information. Make sure that it both
724 * exists and is considered valid.
725 */
726 if (imc->imc_nsockets < 1 ||
727 imc->imc_sockets[0].isock_valid != IMC_SOCKET_V_VALID) {
728 dec->ids_fail = IMC_DECODE_F_BAD_SOCKET;
729 dec->ids_fail_data = 0;
730 return (B_FALSE);
731 }
732
733 /*
734 * First, we need to make sure that the PA we've been given actually is
735 * meant to target a DRAM address. This address may fall to MMIO, MMCFG,
736 * be an address that's outside of DRAM, or belong to a legacy address
737 * range that is interposed.
738 */
739 if (imc_decode_addr_resvd(imc, dec)) {
740 return (B_FALSE);
741 }
742
743 /*
744 * Now that we have this data, we want to go through and look at the
745 * SAD. The SAD will point us to a specific socket and an IMC / home
746 * agent on that socket which will tell us which TAD we need to use.
747 */
748 if (!imc_decode_sad(imc, dec)) {
749 return (B_FALSE);
750 }
751
752 /*
753 * The decoded SAD information has pointed us a TAD. We need to use this
754 * to point us to the corresponding memory channel and the corresponding
755 * address on the channel.
756 */
757 if (!imc_decode_tad(imc, dec)) {
758 return (B_FALSE);
759 }
760
761 /*
762 * Use the rank interleaving data to determine which DIMM this is, the
763 * relevant rank, and the rank address.
764 */
765 if (!imc_decode_rir(imc, dec)) {
766 return (B_FALSE);
767 }
768
769 return (B_TRUE);
770 }
771