1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2022 Oxide Computer Co.
25 */
26
27 #include <sys/conf.h>
28 #include <sys/ddi.h>
29 #include <sys/ddifm.h>
30 #include <sys/sunddi.h>
31 #include <sys/sunndi.h>
32 #include <sys/stat.h>
33 #include <sys/modctl.h>
34 #include <sys/types.h>
35 #include <sys/cpuvar.h>
36 #include <sys/cmn_err.h>
37 #include <sys/kmem.h>
38 #include <sys/cred.h>
39 #include <sys/ksynch.h>
40 #include <sys/rwlock.h>
41 #include <sys/pghw.h>
42 #include <sys/open.h>
43 #include <sys/policy.h>
44 #include <sys/x86_archext.h>
45 #include <sys/cpu_module.h>
46 #include <qsort.h>
47 #include <sys/pci_cfgspace.h>
48 #include <sys/mc.h>
49 #include <sys/mc_amd.h>
50 #include <sys/smbios.h>
51 #include <sys/pci.h>
52 #include <mcamd.h>
53 #include <mcamd_dimmcfg.h>
54 #include <mcamd_pcicfg.h>
55 #include <mcamd_api.h>
56 #include <sys/fm/cpu/AMD.h>
57 #include <sys/fm/smb/fmsmb.h>
58 #include <sys/fm/protocol.h>
59 #include <sys/fm/util.h>
60
61 /*
62 * Set to prevent mc-amd from attaching.
63 */
64 int mc_no_attach = 0;
65
66 /*
67 * Of the 754/939/940 packages, only socket 940 supports quadrank registered
68 * dimms. Unfortunately, no memory-controller register indicates the
69 * presence of quadrank dimm support or presence (i.e., in terms of number
70 * of slots per cpu, and chip-select lines per slot, The following may be set
71 * in /etc/system to indicate the presence of quadrank support on a motherboard.
72 *
73 * There is no need to set this for F(1207) and S1g1.
74 */
75 int mc_quadranksupport = 0;
76
77 mc_t *mc_list, *mc_last;
78 krwlock_t mc_lock;
79 int mc_hold_attached = 1;
80
81 #define MAX(m, n) ((m) >= (n) ? (m) : (n))
82 #define MIN(m, n) ((m) <= (n) ? (m) : (n))
83
84 /*
85 * The following tuneable is used to determine the DRAM scrubbing rate.
86 * The values range from 0x00-0x16 as described in the BKDG. Zero
87 * disables DRAM scrubbing. Values above zero indicate rates in descending
88 * order.
89 *
90 * The default value below is used on several Sun systems. In the future
91 * this code should assign values dynamically based on memory sizing.
92 */
93 uint32_t mc_scrub_rate_dram = 0xd; /* 64B every 163.8 us; 1GB per 45 min */
94
95 enum {
96 MC_SCRUB_BIOSDEFAULT, /* retain system default value */
97 MC_SCRUB_FIXED, /* assign mc_scrub_rate_* values */
98 MC_SCRUB_MAX /* assign max of system and tunables */
99 } mc_scrub_policy = MC_SCRUB_MAX;
100
101 static void
mc_snapshot_destroy(mc_t * mc)102 mc_snapshot_destroy(mc_t *mc)
103 {
104 ASSERT(RW_LOCK_HELD(&mc_lock));
105
106 if (mc->mc_snapshot == NULL)
107 return;
108
109 kmem_free(mc->mc_snapshot, mc->mc_snapshotsz);
110 mc->mc_snapshot = NULL;
111 mc->mc_snapshotsz = 0;
112 mc->mc_snapshotgen++;
113 }
114
115 static int
mc_snapshot_update(mc_t * mc)116 mc_snapshot_update(mc_t *mc)
117 {
118 ASSERT(RW_LOCK_HELD(&mc_lock));
119
120 if (mc->mc_snapshot != NULL)
121 return (0);
122
123 if (nvlist_pack(mc->mc_nvl, &mc->mc_snapshot, &mc->mc_snapshotsz,
124 NV_ENCODE_XDR, KM_SLEEP) != 0)
125 return (-1);
126
127 return (0);
128 }
129
130 static mc_t *
mc_lookup_by_chipid(int chipid)131 mc_lookup_by_chipid(int chipid)
132 {
133 mc_t *mc;
134
135 ASSERT(RW_LOCK_HELD(&mc_lock));
136
137 for (mc = mc_list; mc != NULL; mc = mc->mc_next) {
138 if (mc->mc_props.mcp_num == chipid)
139 return (mc);
140 }
141
142 return (NULL);
143 }
144
145 /*
146 * Read config register pairs into the two arrays provided on the given
147 * handle and at offsets as follows:
148 *
149 * Index Array r1 offset Array r2 offset
150 * 0 r1addr r2addr
151 * 1 r1addr + incr r2addr + incr
152 * 2 r1addr + 2 * incr r2addr + 2 * incr
153 * ...
154 * n - 1 r1addr + (n - 1) * incr r2addr + (n - 1) * incr
155 *
156 * The number of registers to read into the r1 array is r1n; the number
157 * for the r2 array is r2n.
158 */
159 static void
mc_prop_read_pair(mc_pcicfg_hdl_t cfghdl,uint32_t * r1,off_t r1addr,int r1n,uint32_t * r2,off_t r2addr,int r2n,off_t incr)160 mc_prop_read_pair(mc_pcicfg_hdl_t cfghdl, uint32_t *r1, off_t r1addr,
161 int r1n, uint32_t *r2, off_t r2addr, int r2n, off_t incr)
162 {
163 int i;
164
165 for (i = 0; i < MAX(r1n, r2n); i++, r1addr += incr, r2addr += incr) {
166 if (i < r1n)
167 r1[i] = mc_pcicfg_get32(cfghdl, r1addr);
168 if (i < r2n)
169 r2[i] = mc_pcicfg_get32(cfghdl, r2addr);
170 }
171 }
172
173 /*ARGSUSED*/
174 static int
mc_nvl_add_socket_cb(cmi_hdl_t whdl,void * arg1,void * arg2,void * arg3)175 mc_nvl_add_socket_cb(cmi_hdl_t whdl, void *arg1, void *arg2, void *arg3)
176 {
177 uint32_t skt = *((uint32_t *)arg1);
178 cmi_hdl_t *hdlp = (cmi_hdl_t *)arg2;
179
180 if (cmi_hdl_getsockettype(whdl) == skt) {
181 cmi_hdl_hold(whdl); /* short-term hold */
182 *hdlp = whdl;
183 return (CMI_HDL_WALK_DONE);
184 } else {
185 return (CMI_HDL_WALK_NEXT);
186 }
187 }
188
189 static void
mc_nvl_add_socket(nvlist_t * nvl,mc_t * mc)190 mc_nvl_add_socket(nvlist_t *nvl, mc_t *mc)
191 {
192 cmi_hdl_t hdl = NULL;
193 const char *s;
194
195 cmi_hdl_walk(mc_nvl_add_socket_cb, (void *)&mc->mc_socket,
196 (void *)&hdl, NULL);
197 if (hdl == NULL)
198 s = "Unknown"; /* no cpu for this chipid found */
199 else
200 s = cmi_hdl_getsocketstr(hdl);
201
202 (void) nvlist_add_string(nvl, "socket", s);
203
204 if (hdl != NULL)
205 cmi_hdl_rele(hdl);
206 }
207
208 static uint32_t
mc_ecc_enabled(mc_t * mc)209 mc_ecc_enabled(mc_t *mc)
210 {
211 x86_chiprev_t rev = mc->mc_props.mcp_rev;
212 union mcreg_nbcfg nbcfg;
213
214 MCREG_VAL32(&nbcfg) = mc->mc_cfgregs.mcr_nbcfg;
215
216 return (MC_REV_MATCH(rev, MC_F_REVS_BCDE) ?
217 MCREG_FIELD_F_preF(&nbcfg, EccEn) :
218 MCREG_FIELD_F_revFG(&nbcfg, EccEn));
219 }
220
221 static uint32_t
mc_ck_enabled(mc_t * mc)222 mc_ck_enabled(mc_t *mc)
223 {
224 x86_chiprev_t rev = mc->mc_props.mcp_rev;
225 union mcreg_nbcfg nbcfg;
226
227 MCREG_VAL32(&nbcfg) = mc->mc_cfgregs.mcr_nbcfg;
228
229 return (MC_REV_MATCH(rev, MC_F_REVS_BCDE) ?
230 MCREG_FIELD_F_preF(&nbcfg, ChipKillEccEn) :
231 MCREG_FIELD_F_revFG(&nbcfg, ChipKillEccEn));
232 }
233
234 static void
mc_nvl_add_ecctype(nvlist_t * nvl,mc_t * mc)235 mc_nvl_add_ecctype(nvlist_t *nvl, mc_t *mc)
236 {
237 (void) nvlist_add_string(nvl, "ecc-type", mc_ecc_enabled(mc) ?
238 (mc_ck_enabled(mc) ? "ChipKill 128/16" : "Normal 64/8") : "None");
239 }
240
241 static void
mc_nvl_add_prop(nvlist_t * nvl,void * node,mcamd_propcode_t code,int reqval)242 mc_nvl_add_prop(nvlist_t *nvl, void *node, mcamd_propcode_t code, int reqval)
243 {
244 int valfound;
245 uint64_t value;
246 const char *name = mcamd_get_propname(code);
247
248 valfound = mcamd_get_numprop(NULL, (mcamd_node_t *)node, code, &value);
249
250 ASSERT(name != NULL && valfound);
251 if (name != NULL && valfound && (!reqval || value != MC_INVALNUM))
252 (void) nvlist_add_uint64(nvl, name, value);
253 }
254
255 static void
mc_nvl_add_cslist(nvlist_t * mcnvl,mc_t * mc)256 mc_nvl_add_cslist(nvlist_t *mcnvl, mc_t *mc)
257 {
258 mc_cs_t *mccs = mc->mc_cslist;
259 nvlist_t *cslist[MC_CHIP_NCS];
260 int nelem, i;
261
262 for (nelem = 0; mccs != NULL; mccs = mccs->mccs_next, nelem++) {
263 nvlist_t **csp = &cslist[nelem];
264 char csname[MCDCFG_CSNAMELEN];
265
266 (void) nvlist_alloc(csp, NV_UNIQUE_NAME, KM_SLEEP);
267 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_NUM, 0);
268 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_BASE_ADDR, 0);
269 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_MASK, 0);
270 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_SIZE, 0);
271
272 /*
273 * It is possible for an mc_cs_t not to have associated
274 * DIMM info if mcdcfg_lookup failed.
275 */
276 if (mccs->mccs_csl[0] != NULL) {
277 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_CSDIMM1, 1);
278 mcdcfg_csname(mc->mc_socket, mccs->mccs_csl[0], csname,
279 sizeof (csname));
280 (void) nvlist_add_string(*csp, "dimm1-csname", csname);
281 }
282
283 if (mccs->mccs_csl[1] != NULL) {
284 mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_CSDIMM2, 1);
285 mcdcfg_csname(mc->mc_socket, mccs->mccs_csl[1], csname,
286 sizeof (csname));
287 (void) nvlist_add_string(*csp, "dimm2-csname", csname);
288 }
289 }
290
291 /* Add cslist nvlist array even if zero members */
292 (void) nvlist_add_nvlist_array(mcnvl, "cslist", cslist, nelem);
293 for (i = 0; i < nelem; i++)
294 nvlist_free(cslist[i]);
295 }
296
297 static void
mc_nvl_add_dimmlist(nvlist_t * mcnvl,mc_t * mc)298 mc_nvl_add_dimmlist(nvlist_t *mcnvl, mc_t *mc)
299 {
300 nvlist_t *dimmlist[MC_CHIP_NDIMM];
301 mc_dimm_t *mcd;
302 int nelem, i;
303
304 for (nelem = 0, mcd = mc->mc_dimmlist; mcd != NULL;
305 mcd = mcd->mcd_next, nelem++) {
306 nvlist_t **dimmp = &dimmlist[nelem];
307 uint64_t csnums[MC_CHIP_DIMMRANKMAX];
308 char csname[4][MCDCFG_CSNAMELEN];
309 char *csnamep[4];
310 int ncs = 0;
311
312 (void) nvlist_alloc(dimmp, NV_UNIQUE_NAME, KM_SLEEP);
313
314 mc_nvl_add_prop(*dimmp, mcd, MCAMD_PROP_NUM, 1);
315 mc_nvl_add_prop(*dimmp, mcd, MCAMD_PROP_SIZE, 1);
316
317 for (i = 0; i < MC_CHIP_DIMMRANKMAX; i++) {
318 if (mcd->mcd_cs[i] != NULL) {
319 csnums[ncs] =
320 mcd->mcd_cs[i]->mccs_props.csp_num;
321 mcdcfg_csname(mc->mc_socket, mcd->mcd_csl[i],
322 csname[ncs], MCDCFG_CSNAMELEN);
323 csnamep[ncs] = csname[ncs];
324 ncs++;
325 }
326 }
327
328 (void) nvlist_add_uint64_array(*dimmp, "csnums", csnums, ncs);
329 (void) nvlist_add_string_array(*dimmp, "csnames", csnamep, ncs);
330 }
331
332 /* Add dimmlist nvlist array even if zero members */
333 (void) nvlist_add_nvlist_array(mcnvl, "dimmlist", dimmlist, nelem);
334 for (i = 0; i < nelem; i++)
335 nvlist_free(dimmlist[i]);
336 }
337
338 static void
mc_nvl_add_htconfig(nvlist_t * mcnvl,mc_t * mc)339 mc_nvl_add_htconfig(nvlist_t *mcnvl, mc_t *mc)
340 {
341 mc_cfgregs_t *mcr = &mc->mc_cfgregs;
342 union mcreg_htroute *htrp = (union mcreg_htroute *)&mcr->mcr_htroute[0];
343 union mcreg_nodeid *nip = (union mcreg_nodeid *)&mcr->mcr_htnodeid;
344 union mcreg_unitid *uip = (union mcreg_unitid *)&mcr->mcr_htunitid;
345 int ndcnt = HT_COHERENTNODES(nip);
346 uint32_t BCRte[MC_CHIP_MAXNODES];
347 uint32_t RPRte[MC_CHIP_MAXNODES];
348 uint32_t RQRte[MC_CHIP_MAXNODES];
349 nvlist_t *nvl;
350 int i;
351
352 (void) nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
353
354 (void) nvlist_add_uint32(nvl, "NodeId", MCREG_FIELD_CMN(nip, NodeId));
355 (void) nvlist_add_uint32(nvl, "CoherentNodes", HT_COHERENTNODES(nip));
356 (void) nvlist_add_uint32(nvl, "SbNode", MCREG_FIELD_CMN(nip, SbNode));
357 (void) nvlist_add_uint32(nvl, "LkNode", MCREG_FIELD_CMN(nip, LkNode));
358 (void) nvlist_add_uint32(nvl, "SystemCoreCount",
359 HT_SYSTEMCORECOUNT(nip));
360
361 (void) nvlist_add_uint32(nvl, "C0Unit", MCREG_FIELD_CMN(uip, C0Unit));
362 (void) nvlist_add_uint32(nvl, "C1Unit", MCREG_FIELD_CMN(uip, C1Unit));
363 (void) nvlist_add_uint32(nvl, "McUnit", MCREG_FIELD_CMN(uip, McUnit));
364 (void) nvlist_add_uint32(nvl, "HbUnit", MCREG_FIELD_CMN(uip, HbUnit));
365 (void) nvlist_add_uint32(nvl, "SbLink", MCREG_FIELD_CMN(uip, SbLink));
366
367 if (ndcnt <= MC_CHIP_MAXNODES) {
368 for (i = 0; i < ndcnt; i++, htrp++) {
369 BCRte[i] = MCREG_FIELD_CMN(htrp, BCRte);
370 RPRte[i] = MCREG_FIELD_CMN(htrp, RPRte);
371 RQRte[i] = MCREG_FIELD_CMN(htrp, RQRte);
372 }
373
374 (void) nvlist_add_uint32_array(nvl, "BroadcastRoutes",
375 &BCRte[0], ndcnt);
376 (void) nvlist_add_uint32_array(nvl, "ResponseRoutes",
377 &RPRte[0], ndcnt);
378 (void) nvlist_add_uint32_array(nvl, "RequestRoutes",
379 &RQRte[0], ndcnt);
380 }
381
382 (void) nvlist_add_nvlist(mcnvl, "htconfig", nvl);
383 nvlist_free(nvl);
384 }
385
386 static nvlist_t *
mc_nvl_create(mc_t * mc)387 mc_nvl_create(mc_t *mc)
388 {
389 nvlist_t *mcnvl;
390
391 (void) nvlist_alloc(&mcnvl, NV_UNIQUE_NAME, KM_SLEEP);
392
393 /*
394 * Since this nvlist is used in populating the topo tree changes
395 * made here may propogate through to changed property names etc
396 * in the topo tree. Some properties in the topo tree will be
397 * contracted via ARC, so be careful what you change here.
398 */
399 (void) nvlist_add_uint8(mcnvl, MC_NVLIST_VERSTR, MC_NVLIST_VERS1);
400
401 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_NUM, 0);
402 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_REV, 0);
403 (void) nvlist_add_string(mcnvl, "revname", mc->mc_revname);
404 mc_nvl_add_socket(mcnvl, mc);
405 mc_nvl_add_ecctype(mcnvl, mc);
406
407 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BASE_ADDR, 0);
408 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_LIM_ADDR, 0);
409 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ILEN, 0);
410 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ILSEL, 0);
411 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_CSINTLVFCTR, 0);
412 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_DRAMHOLE_SIZE, 0);
413 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ACCESS_WIDTH, 0);
414 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_CSBANKMAPREG, 0);
415 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BANKSWZL, 0);
416 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_MOD64MUX, 0);
417 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_SPARECS, 1);
418 mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BADCS, 1);
419
420 mc_nvl_add_cslist(mcnvl, mc);
421 mc_nvl_add_dimmlist(mcnvl, mc);
422 mc_nvl_add_htconfig(mcnvl, mc);
423
424 return (mcnvl);
425 }
426
427 /*
428 * Link a dimm to its associated chip-selects and chip-select lines.
429 * Total the size of all ranks of this dimm.
430 */
431 static void
mc_dimm_csadd(mc_t * mc,mc_dimm_t * mcd,mc_cs_t * mccs,const mcdcfg_csl_t * csl)432 mc_dimm_csadd(mc_t *mc, mc_dimm_t *mcd, mc_cs_t *mccs, const mcdcfg_csl_t *csl)
433 {
434 int factor = (mc->mc_props.mcp_accwidth == 128) ? 2 : 1;
435 uint64_t sz = 0;
436 int i;
437
438 /* Skip to first unused rank slot */
439 for (i = 0; i < MC_CHIP_DIMMRANKMAX; i++) {
440 if (mcd->mcd_cs[i] == NULL) {
441 mcd->mcd_cs[i] = mccs;
442 mcd->mcd_csl[i] = csl;
443 sz += mccs->mccs_props.csp_size / factor;
444 break;
445 } else {
446 sz += mcd->mcd_cs[i]->mccs_props.csp_size / factor;
447 }
448 }
449
450 ASSERT(i != MC_CHIP_DIMMRANKMAX);
451
452 mcd->mcd_size = sz;
453 }
454
455 /*
456 * Create a dimm structure and call to link it to its associated chip-selects.
457 */
458 static mc_dimm_t *
mc_dimm_create(mc_t * mc,uint_t num)459 mc_dimm_create(mc_t *mc, uint_t num)
460 {
461 mc_dimm_t *mcd = kmem_zalloc(sizeof (mc_dimm_t), KM_SLEEP);
462
463 mcd->mcd_hdr.mch_type = MC_NT_DIMM;
464 mcd->mcd_mc = mc;
465 mcd->mcd_num = num;
466
467 return (mcd);
468 }
469
470 /*
471 * The chip-select structure includes an array of dimms associated with
472 * that chip-select. This function fills that array, and also builds
473 * the list of all dimms on this memory controller mc_dimmlist. The
474 * caller has filled a structure with all there is to know about the
475 * associated dimm(s).
476 */
477 static void
mc_csdimms_create(mc_t * mc,mc_cs_t * mccs,mcdcfg_rslt_t * rsltp)478 mc_csdimms_create(mc_t *mc, mc_cs_t *mccs, mcdcfg_rslt_t *rsltp)
479 {
480 mc_dimm_t *found[MC_CHIP_DIMMPERCS];
481 mc_dimm_t *mcd;
482 int nfound = 0;
483 int i;
484
485 /*
486 * Has some other chip-select already created this dimm or dimms?
487 * If so then link to the dimm(s) from the mccs_dimm array,
488 * record their topo numbers in the csp_dimmnums array, and link
489 * the dimm(s) to the additional chip-select.
490 */
491 for (mcd = mc->mc_dimmlist; mcd != NULL; mcd = mcd->mcd_next) {
492 for (i = 0; i < rsltp->ndimm; i++) {
493 if (mcd->mcd_num == rsltp->dimm[i].toponum)
494 found[nfound++] = mcd;
495 }
496 }
497 ASSERT(nfound == 0 || nfound == rsltp->ndimm);
498
499 for (i = 0; i < rsltp->ndimm; i++) {
500 if (nfound == 0) {
501 mcd = mc_dimm_create(mc, rsltp->dimm[i].toponum);
502 if (mc->mc_dimmlist == NULL)
503 mc->mc_dimmlist = mcd;
504 else
505 mc->mc_dimmlast->mcd_next = mcd;
506 mc->mc_dimmlast = mcd;
507 } else {
508 mcd = found[i];
509 }
510
511 mccs->mccs_dimm[i] = mcd;
512 mccs->mccs_csl[i] = rsltp->dimm[i].cslp;
513 mccs->mccs_props.csp_dimmnums[i] = mcd->mcd_num;
514 mc_dimm_csadd(mc, mcd, mccs, rsltp->dimm[i].cslp);
515
516 }
517
518 /* The rank number is constant across all constituent dimm(s) */
519 mccs->mccs_props.csp_dimmrank = rsltp->dimm[0].cslp->csl_rank;
520 }
521
522 /*
523 * mc_dimmlist_create is called after we have discovered all enabled
524 * (and spare or testfailed on revs F and G) chip-selects on the
525 * given memory controller. For each chip-select we must derive
526 * the associated dimms, remembering that a chip-select csbase/csmask
527 * pair may be associated with up to 2 chip-select lines (in 128 bit mode)
528 * and that any one dimm may be associated with 1, 2, or 4 chip-selects
529 * depending on whether it is single, dual or quadrank.
530 */
531 static void
mc_dimmlist_create(mc_t * mc)532 mc_dimmlist_create(mc_t *mc)
533 {
534 union mcreg_dramcfg_hi *drcfghip =
535 (union mcreg_dramcfg_hi *)(&mc->mc_cfgregs.mcr_dramcfghi);
536 mc_props_t *mcp = &mc->mc_props;
537 x86_chiprev_t rev = mcp->mcp_rev;
538 mc_cs_t *mccs;
539 int r4 = 0, s4 = 0;
540
541 /*
542 * Are we dealing with quadrank registered dimms?
543 *
544 * For socket 940 we can't tell and we'll assume we're not.
545 * This can be over-ridden by the admin in /etc/system by setting
546 * mc_quadranksupport nonzero. A possible optimisation in systems
547 * that export an SMBIOS table would be to count the number of
548 * dimm slots per cpu - more than 4 would indicate no quadrank support
549 * and 4 or fewer would indicate that if we see any of the upper
550 * chip-selects enabled then a quadrank dimm is present.
551 *
552 * For socket F(1207) we can check a bit in the dram config high reg.
553 *
554 * Other socket types do not support registered dimms.
555 */
556 if (mc->mc_socket == X86_SOCKET_940)
557 r4 = mc_quadranksupport != 0;
558 else if (mc->mc_socket == X86_SOCKET_F1207)
559 r4 = MCREG_FIELD_F_revFG(drcfghip, FourRankRDimm);
560
561 /*
562 * Are we dealing with quadrank SO-DIMMs? These are supported
563 * in AM2 and S1g1 packages only, but in all rev F/G cases we
564 * can detect their presence via a bit in the dram config high reg.
565 */
566 if (MC_REV_MATCH(rev, MC_F_REVS_FG))
567 s4 = MCREG_FIELD_F_revFG(drcfghip, FourRankSODimm);
568
569 for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
570 mcdcfg_rslt_t rslt;
571
572 /*
573 * If lookup fails we will not create dimm structures for
574 * this chip-select. In the mc_cs_t we will have both
575 * csp_dimmnum members set to MC_INVALNUM and patounum
576 * code will see from those that we do not have dimm info
577 * for this chip-select.
578 */
579 if (mcdcfg_lookup(rev, mcp->mcp_mod64mux, mcp->mcp_accwidth,
580 mccs->mccs_props.csp_num, mc->mc_socket,
581 r4, s4, &rslt) < 0)
582 continue;
583
584 mc_csdimms_create(mc, mccs, &rslt);
585 }
586 }
587
588 static mc_cs_t *
mc_cs_create(mc_t * mc,uint_t num,uint64_t base,uint64_t mask,size_t sz,int csbe,int spare,int testfail)589 mc_cs_create(mc_t *mc, uint_t num, uint64_t base, uint64_t mask, size_t sz,
590 int csbe, int spare, int testfail)
591 {
592 mc_cs_t *mccs = kmem_zalloc(sizeof (mc_cs_t), KM_SLEEP);
593 mccs_props_t *csp = &mccs->mccs_props;
594 int i;
595
596 mccs->mccs_hdr.mch_type = MC_NT_CS;
597 mccs->mccs_mc = mc;
598 csp->csp_num = num;
599 csp->csp_base = base;
600 csp->csp_mask = mask;
601 csp->csp_size = sz;
602 csp->csp_csbe = csbe;
603 csp->csp_spare = spare;
604 csp->csp_testfail = testfail;
605
606 for (i = 0; i < MC_CHIP_DIMMPERCS; i++)
607 csp->csp_dimmnums[i] = MC_INVALNUM;
608
609 if (spare)
610 mc->mc_props.mcp_sparecs = num;
611
612 return (mccs);
613 }
614
615 /*
616 * For any cs# of this mc marked TestFail generate an ereport with
617 * resource identifying the associated dimm(s).
618 */
619 static void
mc_report_testfails(mc_t * mc)620 mc_report_testfails(mc_t *mc)
621 {
622 mc_unum_t unum;
623 mc_cs_t *mccs;
624 int i;
625
626 for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
627 if (mccs->mccs_props.csp_testfail) {
628 unum.unum_board = 0;
629 unum.unum_chip = mc->mc_props.mcp_num;
630 unum.unum_mc = 0;
631 unum.unum_chan = MC_INVALNUM;
632 unum.unum_cs = mccs->mccs_props.csp_num;
633 unum.unum_rank = mccs->mccs_props.csp_dimmrank;
634 unum.unum_offset = MCAMD_RC_INVALID_OFFSET;
635 for (i = 0; i < MC_CHIP_DIMMPERCS; i++)
636 unum.unum_dimms[i] = MC_INVALNUM;
637
638 mcamd_ereport_post(mc, FM_EREPORT_CPU_AMD_MC_TESTFAIL,
639 &unum,
640 FM_EREPORT_PAYLOAD_FLAGS_CPU_AMD_MC_TESTFAIL);
641 }
642 }
643 }
644
645 /*
646 * Function 0 - HyperTransport Technology Configuration
647 */
648 static void
mc_mkprops_htcfg(mc_pcicfg_hdl_t cfghdl,mc_t * mc)649 mc_mkprops_htcfg(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
650 {
651 union mcreg_nodeid nodeid;
652 off_t offset;
653 int i;
654
655 mc->mc_cfgregs.mcr_htnodeid = MCREG_VAL32(&nodeid) =
656 mc_pcicfg_get32(cfghdl, MC_HT_REG_NODEID);
657
658 mc->mc_cfgregs.mcr_htunitid = mc_pcicfg_get32(cfghdl, MC_HT_REG_UNITID);
659
660 for (i = 0, offset = MC_HT_REG_RTBL_NODE_0;
661 i < HT_COHERENTNODES(&nodeid);
662 i++, offset += MC_HT_REG_RTBL_INCR)
663 mc->mc_cfgregs.mcr_htroute[i] = mc_pcicfg_get32(cfghdl, offset);
664 }
665
666 /*
667 * Function 1 Configuration - Address Map (see BKDG 3.4.4 DRAM Address Map)
668 *
669 * Read the Function 1 Address Map for each potential DRAM node. The Base
670 * Address for a node gives the starting system address mapped at that node,
671 * and the limit gives the last valid address mapped at that node. Regions for
672 * different nodes should not overlap, unless node-interleaving is enabled.
673 * The base register also indicates the node-interleaving settings (IntlvEn).
674 * The limit register includes IntlvSel which determines which 4K blocks will
675 * be routed to this node and the destination node ID for addresses that fall
676 * within the [base, limit] range - this must match the pair number.
677 */
678 static void
mc_mkprops_addrmap(mc_pcicfg_hdl_t cfghdl,mc_t * mc)679 mc_mkprops_addrmap(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
680 {
681 union mcreg_drambase basereg;
682 union mcreg_dramlimit limreg;
683 mc_props_t *mcp = &mc->mc_props;
684 mc_cfgregs_t *mcr = &mc->mc_cfgregs;
685 union mcreg_dramhole hole;
686 int nodeid = mc->mc_props.mcp_num;
687
688 mcr->mcr_drambase = MCREG_VAL32(&basereg) = mc_pcicfg_get32(cfghdl,
689 MC_AM_REG_DRAMBASE_0 + nodeid * MC_AM_REG_DRAM_INCR);
690
691 mcr->mcr_dramlimit = MCREG_VAL32(&limreg) = mc_pcicfg_get32(cfghdl,
692 MC_AM_REG_DRAMLIM_0 + nodeid * MC_AM_REG_DRAM_INCR);
693
694 /*
695 * Derive some "cooked" properties for nodes that have a range of
696 * physical addresses that are read or write enabled and for which
697 * the DstNode matches the node we are attaching.
698 */
699 if (MCREG_FIELD_CMN(&limreg, DRAMLimiti) != 0 &&
700 MCREG_FIELD_CMN(&limreg, DstNode) == nodeid &&
701 (MCREG_FIELD_CMN(&basereg, WE) || MCREG_FIELD_CMN(&basereg, RE))) {
702 mcp->mcp_base = MC_DRAMBASE(&basereg);
703 mcp->mcp_lim = MC_DRAMLIM(&limreg);
704 mcp->mcp_ilen = MCREG_FIELD_CMN(&basereg, IntlvEn);
705 mcp->mcp_ilsel = MCREG_FIELD_CMN(&limreg, IntlvSel);
706 }
707
708 /*
709 * The Function 1 DRAM Hole Address Register tells us which node(s)
710 * own the DRAM space that is hoisted above 4GB, together with the
711 * hole base and offset for this node. This was introduced in
712 * revision E.
713 */
714 if (MC_REV_ATLEAST(mc->mc_props.mcp_rev, MC_F_REV_E)) {
715 mcr->mcr_dramhole = MCREG_VAL32(&hole) =
716 mc_pcicfg_get32(cfghdl, MC_AM_REG_HOLEADDR);
717
718 if (MCREG_FIELD_CMN(&hole, DramHoleValid))
719 mcp->mcp_dramhole_size = MC_DRAMHOLE_SIZE(&hole);
720 }
721 }
722
723 /*
724 * Read some function 3 parameters via PCI Mechanism 1 accesses (which
725 * will serialize any NB accesses).
726 */
727 static void
mc_getmiscctl(mc_t * mc)728 mc_getmiscctl(mc_t *mc)
729 {
730 x86_chiprev_t rev = mc->mc_props.mcp_rev;
731 union mcreg_nbcfg nbcfg;
732 union mcreg_sparectl sparectl;
733
734 mc->mc_cfgregs.mcr_nbcfg = MCREG_VAL32(&nbcfg) =
735 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_NBCFG);
736
737 if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
738 mc->mc_cfgregs.mcr_sparectl = MCREG_VAL32(&sparectl) =
739 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
740 MC_CTL_REG_SPARECTL);
741
742 if (MCREG_FIELD_F_revFG(&sparectl, SwapDone)) {
743 mc->mc_props.mcp_badcs =
744 MCREG_FIELD_F_revFG(&sparectl, BadDramCs);
745 }
746 }
747 }
748
749 static int
csbasecmp(mc_cs_t ** csapp,mc_cs_t ** csbpp)750 csbasecmp(mc_cs_t **csapp, mc_cs_t **csbpp)
751 {
752 uint64_t basea = (*csapp)->mccs_props.csp_base;
753 uint64_t baseb = (*csbpp)->mccs_props.csp_base;
754
755 if (basea == baseb)
756 return (0);
757 else if (basea < baseb)
758 return (-1);
759 else
760 return (1);
761 }
762
763 /*
764 * The following are for use in simulating TestFail for a chip-select
765 * without poking at the hardware (which tends to get upset if you do
766 * since the BIOS needs to restart to map a failed cs out). For internal
767 * testing only! Note that setting these does not give the full experience -
768 * the select chip-select *is* enabled and can give errors etc and the
769 * patounum logic will get confused.
770 */
771 int testfail_mcnum = -1;
772 int testfail_csnum = -1;
773
774 /*
775 * Function 2 configuration - DRAM Controller
776 */
777 static void
mc_mkprops_dramctl(mc_pcicfg_hdl_t cfghdl,mc_t * mc)778 mc_mkprops_dramctl(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
779 {
780 union mcreg_csbase base[MC_CHIP_NCS];
781 union mcreg_csmask mask[MC_CHIP_NCS];
782 union mcreg_dramcfg_lo drcfg_lo;
783 union mcreg_dramcfg_hi drcfg_hi;
784 union mcreg_drammisc drmisc;
785 union mcreg_bankaddrmap baddrmap;
786 mc_props_t *mcp = &mc->mc_props;
787 mc_cfgregs_t *mcr = &mc->mc_cfgregs;
788 int maskdivisor;
789 int wide = 0;
790 x86_chiprev_t rev = mc->mc_props.mcp_rev;
791 int i;
792 mcamd_hdl_t hdl;
793
794 mcamd_mkhdl(&hdl); /* to call into common code */
795
796 /*
797 * Read Function 2 DRAM Configuration High and Low registers. The High
798 * part is mostly concerned with memory clocks etc and we'll not have
799 * any use for that. The Low component tells us if ECC is enabled,
800 * if we're in 64- or 128-bit MC mode, how the upper chip-selects
801 * are mapped, which chip-select pairs are using x4 parts, etc.
802 */
803 MCREG_VAL32(&drcfg_lo) = mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMCFGLO);
804 MCREG_VAL32(&drcfg_hi) = mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMCFGHI);
805 mcr->mcr_dramcfglo = MCREG_VAL32(&drcfg_lo);
806 mcr->mcr_dramcfghi = MCREG_VAL32(&drcfg_hi);
807
808 /*
809 * Note the DRAM controller width. The 64/128 bit is in a different
810 * bit position for revision F and G.
811 */
812 if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
813 wide = MCREG_FIELD_F_revFG(&drcfg_lo, Width128);
814 } else {
815 wide = MCREG_FIELD_F_preF(&drcfg_lo, Width128);
816 }
817 mcp->mcp_accwidth = wide ? 128 : 64;
818
819 /*
820 * Read Function 2 DRAM Controller Miscellaenous Regsiter for those
821 * revs that support it. This include the Mod64Mux indication on
822 * these revs - for rev E it is in DRAM config low.
823 */
824 if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
825 mcr->mcr_drammisc = MCREG_VAL32(&drmisc) =
826 mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMMISC);
827 mcp->mcp_mod64mux = MCREG_FIELD_F_revFG(&drmisc, Mod64Mux);
828 } else if (MC_REV_MATCH(rev, MC_F_REV_E)) {
829 mcp->mcp_mod64mux = MCREG_FIELD_F_preF(&drcfg_lo, Mod64BitMux);
830 }
831
832 /*
833 * Read Function 2 DRAM Bank Address Mapping. This encodes the
834 * type of DIMM module in use for each chip-select pair.
835 * Prior ro revision F it also tells us whether BankSwizzle mode
836 * is enabled - in rev F that has moved to dram config hi register.
837 */
838 mcp->mcp_csbankmapreg = MCREG_VAL32(&baddrmap) =
839 mc_pcicfg_get32(cfghdl, MC_DC_REG_BANKADDRMAP);
840
841 /*
842 * Determine whether bank swizzle mode is active. Bank swizzling was
843 * introduced as an option in rev E, but the bit that indicates it
844 * is enabled has moved in revs F/G.
845 */
846 if (MC_REV_MATCH(rev, MC_F_REV_E)) {
847 mcp->mcp_bnkswzl =
848 MCREG_FIELD_F_preF(&baddrmap, BankSwizzleMode);
849 } else if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
850 mcp->mcp_bnkswzl = MCREG_FIELD_F_revFG(&drcfg_hi,
851 BankSwizzleMode);
852 }
853
854 /*
855 * Read the DRAM CS Base and DRAM CS Mask registers. Revisions prior
856 * to F have an equal number of base and mask registers; revision F
857 * has twice as many base registers as masks.
858 */
859 maskdivisor = MC_REV_MATCH(rev, MC_F_REVS_FG) ? 2 : 1;
860
861 mc_prop_read_pair(cfghdl,
862 (uint32_t *)base, MC_DC_REG_CSBASE_0, MC_CHIP_NCS,
863 (uint32_t *)mask, MC_DC_REG_CSMASK_0, MC_CHIP_NCS / maskdivisor,
864 MC_DC_REG_CS_INCR);
865
866 /*
867 * Create a cs node for each enabled chip-select as well as
868 * any appointed online spare chip-selects and for any that have
869 * failed test.
870 */
871 for (i = 0; i < MC_CHIP_NCS; i++) {
872 mc_cs_t *mccs;
873 uint64_t csbase, csmask;
874 size_t sz;
875 int csbe, spare, testfail;
876
877 if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
878 csbe = MCREG_FIELD_F_revFG(&base[i], CSEnable);
879 spare = MCREG_FIELD_F_revFG(&base[i], Spare);
880 testfail = MCREG_FIELD_F_revFG(&base[i], TestFail);
881 } else {
882 csbe = MCREG_FIELD_F_preF(&base[i], CSEnable);
883 spare = 0;
884 testfail = 0;
885 }
886
887 /* Testing hook */
888 if (testfail_mcnum != -1 && testfail_csnum != -1 &&
889 mcp->mcp_num == testfail_mcnum && i == testfail_csnum) {
890 csbe = spare = 0;
891 testfail = 1;
892 cmn_err(CE_NOTE, "Pretending MC %d CS %d failed test",
893 testfail_mcnum, testfail_csnum);
894 }
895
896 /*
897 * If the chip-select is not enabled then skip it unless
898 * it is a designated online spare or is marked with TestFail.
899 */
900 if (!csbe && !(spare || testfail))
901 continue;
902
903 /*
904 * For an enabled or spare chip-select the Bank Address Mapping
905 * register will be valid as will the chip-select mask. The
906 * base will not be valid but we'll read and store it anyway.
907 * We will not know whether the spare is already swapped in
908 * until MC function 3 attaches.
909 */
910 if (csbe || spare) {
911 if (mcamd_cs_size(&hdl, (mcamd_node_t *)mc, i, &sz) < 0)
912 continue;
913 csbase = MC_CSBASE(&base[i], rev);
914 csmask = MC_CSMASK(&mask[i / maskdivisor], rev);
915 } else {
916 sz = 0;
917 csbase = csmask = 0;
918 }
919
920 mccs = mc_cs_create(mc, i, csbase, csmask, sz,
921 csbe, spare, testfail);
922
923 if (mc->mc_cslist == NULL)
924 mc->mc_cslist = mccs;
925 else
926 mc->mc_cslast->mccs_next = mccs;
927 mc->mc_cslast = mccs;
928
929 mccs->mccs_cfgregs.csr_csbase = MCREG_VAL32(&base[i]);
930 mccs->mccs_cfgregs.csr_csmask =
931 MCREG_VAL32(&mask[i / maskdivisor]);
932
933 /*
934 * Check for cs bank interleaving - some bits clear in the
935 * lower mask. All banks must/will have the same lomask bits
936 * if cs interleaving is active.
937 */
938 if (csbe && !mcp->mcp_csintlvfctr) {
939 int bitno, ibits = 0;
940 for (bitno = MC_CSMASKLO_LOBIT(rev);
941 bitno <= MC_CSMASKLO_HIBIT(rev); bitno++) {
942 if (!(csmask & (1 << bitno)))
943 ibits++;
944 }
945 mcp->mcp_csintlvfctr = 1 << ibits;
946 }
947 }
948
949 /*
950 * If there is no chip-select interleave on this node determine
951 * whether the chip-select ranks are contiguous or if there
952 * is a hole.
953 */
954 if (mcp->mcp_csintlvfctr == 1) {
955 mc_cs_t *csp[MC_CHIP_NCS];
956 mc_cs_t *mccs;
957 int ncsbe = 0;
958
959 for (mccs = mc->mc_cslist; mccs != NULL;
960 mccs = mccs->mccs_next) {
961 if (mccs->mccs_props.csp_csbe)
962 csp[ncsbe++] = mccs;
963 }
964
965 if (ncsbe != 0) {
966 qsort((void *)csp, ncsbe, sizeof (mc_cs_t *),
967 (int (*)(const void *, const void *))csbasecmp);
968
969 for (i = 1; i < ncsbe; i++) {
970 if (csp[i]->mccs_props.csp_base !=
971 csp[i - 1]->mccs_props.csp_base +
972 csp[i - 1]->mccs_props.csp_size)
973 mc->mc_csdiscontig = 1;
974 }
975 }
976 }
977
978
979 /*
980 * Since we do not attach to MC function 3 go ahead and read some
981 * config parameters from it now.
982 */
983 mc_getmiscctl(mc);
984
985 /*
986 * Now that we have discovered all enabled/spare/testfail chip-selects
987 * we divine the associated DIMM configuration.
988 */
989 mc_dimmlist_create(mc);
990 }
991
992 typedef struct mc_bind_map {
993 const char *bm_bindnm; /* attachment binding name */
994 enum mc_funcnum bm_func; /* PCI config space function number for bind */
995 const char *bm_model; /* value for device node model property */
996 void (*bm_mkprops)(mc_pcicfg_hdl_t, mc_t *);
997 } mc_bind_map_t;
998
999 /*
1000 * Do not attach to MC function 3 - agpgart already attaches to that.
1001 * Function 3 may be a good candidate for a nexus driver to fan it out
1002 * into virtual devices by functionality. We will use pci_mech1_getl
1003 * to retrieve the function 3 parameters we require.
1004 */
1005
1006 static const mc_bind_map_t mc_bind_map[] = {
1007 { MC_FUNC_HTCONFIG_BINDNM, MC_FUNC_HTCONFIG,
1008 "AMD Memory Controller (HT Configuration)", mc_mkprops_htcfg },
1009 { MC_FUNC_ADDRMAP_BINDNM, MC_FUNC_ADDRMAP,
1010 "AMD Memory Controller (Address Map)", mc_mkprops_addrmap },
1011 { MC_FUNC_DRAMCTL_BINDNM, MC_FUNC_DRAMCTL,
1012 "AMD Memory Controller (DRAM Controller & HT Trace)",
1013 mc_mkprops_dramctl },
1014 NULL
1015 };
1016
1017 /*ARGSUSED*/
1018 static int
mc_open(dev_t * devp,int flag,int otyp,cred_t * credp)1019 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1020 {
1021 if (otyp != OTYP_CHR)
1022 return (EINVAL);
1023
1024 rw_enter(&mc_lock, RW_READER);
1025 if (mc_lookup_by_chipid(getminor(*devp)) == NULL) {
1026 rw_exit(&mc_lock);
1027 return (EINVAL);
1028 }
1029 rw_exit(&mc_lock);
1030
1031 return (0);
1032 }
1033
1034 /*ARGSUSED*/
1035 static int
mc_close(dev_t dev,int flag,int otyp,cred_t * credp)1036 mc_close(dev_t dev, int flag, int otyp, cred_t *credp)
1037 {
1038 return (0);
1039 }
1040
1041 /*
1042 * Enable swap from chip-select csnum to the spare chip-select on this
1043 * memory controller (if any).
1044 */
1045
1046 int mc_swapdonetime = 30; /* max number of seconds to wait for SwapDone */
1047
1048 static int
mc_onlinespare(mc_t * mc,int csnum)1049 mc_onlinespare(mc_t *mc, int csnum)
1050 {
1051 mc_props_t *mcp = &mc->mc_props;
1052 union mcreg_sparectl sparectl;
1053 union mcreg_scrubctl scrubctl;
1054 mc_cs_t *mccs;
1055 hrtime_t tmax;
1056 int i = 0;
1057
1058 ASSERT(RW_WRITE_HELD(&mc_lock));
1059
1060 if (!MC_REV_MATCH(mcp->mcp_rev, MC_F_REVS_FG))
1061 return (ENOTSUP); /* MC rev does not offer online spare */
1062 else if (mcp->mcp_sparecs == MC_INVALNUM)
1063 return (ENODEV); /* Supported, but no spare configured */
1064 else if (mcp->mcp_badcs != MC_INVALNUM)
1065 return (EBUSY); /* Spare already swapped in */
1066 else if (csnum == mcp->mcp_sparecs)
1067 return (EINVAL); /* Can't spare the spare! */
1068
1069 for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
1070 if (mccs->mccs_props.csp_num == csnum)
1071 break;
1072 }
1073 if (mccs == NULL)
1074 return (EINVAL); /* nominated bad CS does not exist */
1075
1076 /*
1077 * If the DRAM Scrubber is not enabled then the swap cannot succeed.
1078 */
1079 MCREG_VAL32(&scrubctl) = mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
1080 MC_CTL_REG_SCRUBCTL);
1081 if (MCREG_FIELD_CMN(&scrubctl, DramScrub) == 0)
1082 return (ENODEV); /* DRAM scrubber not enabled */
1083
1084 /*
1085 * Read Online Spare Comtrol Register again, just in case our
1086 * state does not reflect reality.
1087 */
1088 MCREG_VAL32(&sparectl) = mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
1089 MC_CTL_REG_SPARECTL);
1090
1091 if (MCREG_FIELD_F_revFG(&sparectl, SwapDone))
1092 return (EBUSY);
1093
1094 /* Write to the BadDramCs field */
1095 MCREG_FIELD_F_revFG(&sparectl, BadDramCs) = csnum;
1096 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL,
1097 MCREG_VAL32(&sparectl));
1098
1099 /* And request that the swap to the spare start */
1100 MCREG_FIELD_F_revFG(&sparectl, SwapEn) = 1;
1101 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL,
1102 MCREG_VAL32(&sparectl));
1103
1104 /*
1105 * Poll for SwapDone - we have disabled notification by interrupt.
1106 * Swap takes "several CPU cycles, depending on the DRAM speed, but
1107 * is performed in the background" (Family 0Fh Bios Porting Guide).
1108 * We're in a slow ioctl path so there is no harm in waiting around
1109 * a bit - consumers of the ioctl must be aware that it may take
1110 * a moment. We will poll for up to mc_swapdonetime seconds,
1111 * limiting that to 120s.
1112 *
1113 * The swap is performed by the DRAM scrubber (which must be enabled)
1114 * whose scrub rate is accelerated for the duration of the swap.
1115 * The maximum swap rate is 40.0ns per 64 bytes, so the maximum
1116 * supported cs size of 16GB would take 10.7s at that max rate
1117 * of 25000000 scrubs/second.
1118 */
1119 tmax = gethrtime() + MIN(mc_swapdonetime, 120) * 1000000000ULL;
1120 do {
1121 if (i++ < 20)
1122 delay(drv_usectohz(100000)); /* 0.1s for up to 2s */
1123 else
1124 delay(drv_usectohz(500000)); /* 0.5s */
1125
1126 MCREG_VAL32(&sparectl) = mc_pcicfg_get32_nohdl(mc,
1127 MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL);
1128 } while (!MCREG_FIELD_F_revFG(&sparectl, SwapDone) &&
1129 gethrtime() < tmax);
1130
1131 if (!MCREG_FIELD_F_revFG(&sparectl, SwapDone))
1132 return (ETIME); /* Operation timed out */
1133
1134 mcp->mcp_badcs = csnum;
1135 mc->mc_cfgregs.mcr_sparectl = MCREG_VAL32(&sparectl);
1136 mc->mc_spareswaptime = gethrtime();
1137
1138 return (0);
1139 }
1140
1141 /*ARGSUSED*/
1142 static int
mc_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)1143 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1144 {
1145 int rc = 0;
1146 mc_t *mc;
1147
1148 if (cmd != MC_IOC_SNAPSHOT_INFO && cmd != MC_IOC_SNAPSHOT &&
1149 cmd != MC_IOC_ONLINESPARE_EN)
1150 return (EINVAL);
1151
1152 rw_enter(&mc_lock, RW_READER);
1153
1154 if ((mc = mc_lookup_by_chipid(getminor(dev))) == NULL) {
1155 rw_exit(&mc_lock);
1156 return (EINVAL);
1157 }
1158
1159 switch (cmd) {
1160 case MC_IOC_SNAPSHOT_INFO: {
1161 mc_snapshot_info_t mcs;
1162
1163 if (mc_snapshot_update(mc) < 0) {
1164 rw_exit(&mc_lock);
1165 return (EIO);
1166 }
1167
1168 mcs.mcs_size = mc->mc_snapshotsz;
1169 mcs.mcs_gen = mc->mc_snapshotgen;
1170
1171 if (ddi_copyout(&mcs, (void *)arg, sizeof (mc_snapshot_info_t),
1172 mode) < 0)
1173 rc = EFAULT;
1174 break;
1175 }
1176
1177 case MC_IOC_SNAPSHOT:
1178 if (mc_snapshot_update(mc) < 0) {
1179 rw_exit(&mc_lock);
1180 return (EIO);
1181 }
1182
1183 if (ddi_copyout(mc->mc_snapshot, (void *)arg, mc->mc_snapshotsz,
1184 mode) < 0)
1185 rc = EFAULT;
1186 break;
1187
1188 case MC_IOC_ONLINESPARE_EN:
1189 if (drv_priv(credp) != 0) {
1190 rw_exit(&mc_lock);
1191 return (EPERM);
1192 }
1193
1194 if (!rw_tryupgrade(&mc_lock)) {
1195 rw_exit(&mc_lock);
1196 return (EAGAIN);
1197 }
1198
1199 if ((rc = mc_onlinespare(mc, (int)arg)) == 0) {
1200 mc_snapshot_destroy(mc);
1201 nvlist_free(mc->mc_nvl);
1202 mc->mc_nvl = mc_nvl_create(mc);
1203 }
1204
1205 break;
1206 }
1207
1208 rw_exit(&mc_lock);
1209
1210 return (rc);
1211 }
1212
1213 static struct cb_ops mc_cb_ops = {
1214 mc_open,
1215 mc_close,
1216 nodev, /* not a block driver */
1217 nodev, /* no print routine */
1218 nodev, /* no dump routine */
1219 nodev, /* no read routine */
1220 nodev, /* no write routine */
1221 mc_ioctl,
1222 nodev, /* no devmap routine */
1223 nodev, /* no mmap routine */
1224 nodev, /* no segmap routine */
1225 nochpoll, /* no chpoll routine */
1226 ddi_prop_op,
1227 0, /* not a STREAMS driver */
1228 D_NEW | D_MP, /* safe for multi-thread/multi-processor */
1229 };
1230
1231 /*ARGSUSED*/
1232 static int
mc_getinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)1233 mc_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1234 {
1235 int rc = DDI_SUCCESS;
1236 mc_t *mc;
1237
1238 if (infocmd != DDI_INFO_DEVT2DEVINFO &&
1239 infocmd != DDI_INFO_DEVT2INSTANCE) {
1240 *result = NULL;
1241 return (DDI_FAILURE);
1242 }
1243
1244 rw_enter(&mc_lock, RW_READER);
1245
1246 if ((mc = mc_lookup_by_chipid(getminor((dev_t)arg))) == NULL ||
1247 mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_devi == NULL) {
1248 rc = DDI_FAILURE;
1249 } else if (infocmd == DDI_INFO_DEVT2DEVINFO) {
1250 *result = mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_devi;
1251 } else {
1252 *result = (void *)(uintptr_t)
1253 mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_instance;
1254 }
1255
1256 rw_exit(&mc_lock);
1257
1258 return (rc);
1259 }
1260
1261 /*ARGSUSED2*/
1262 static int
mc_fm_handle(dev_info_t * dip,ddi_fm_error_t * fmerr,const void * arg)1263 mc_fm_handle(dev_info_t *dip, ddi_fm_error_t *fmerr, const void *arg)
1264 {
1265 pci_ereport_post(dip, fmerr, NULL);
1266 return (fmerr->fme_status);
1267 }
1268
1269 static void
mc_fm_init(dev_info_t * dip)1270 mc_fm_init(dev_info_t *dip)
1271 {
1272 int fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE;
1273 ddi_fm_init(dip, &fmcap, NULL);
1274 pci_ereport_setup(dip);
1275 ddi_fm_handler_register(dip, mc_fm_handle, NULL);
1276 }
1277
1278 static void
mc_read_smbios(mc_t * mc,dev_info_t * dip)1279 mc_read_smbios(mc_t *mc, dev_info_t *dip)
1280 {
1281
1282 uint16_t bdf;
1283 pci_regspec_t *pci_rp = NULL;
1284 uint32_t phys_hi;
1285 int m = 0;
1286 uint_t chip_inst;
1287 int rc = 0;
1288
1289 if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
1290 (caddr_t)&pci_rp, &m) == DDI_SUCCESS) {
1291 phys_hi = pci_rp->pci_phys_hi;
1292 bdf = (uint16_t)(PCI_REG_BDFR_G(phys_hi) >>
1293 PCI_REG_FUNC_SHIFT);
1294 kmem_free(pci_rp, m);
1295 pci_rp = NULL;
1296
1297 rc = fm_smb_mc_chipinst(bdf, &chip_inst);
1298 if (rc == 0) {
1299 mc->smb_chipid = chip_inst;
1300 } else {
1301 #ifdef DEBUG
1302 cmn_err(CE_NOTE, "!mc read smbios chip info failed");
1303 #endif /* DEBUG */
1304 return;
1305 }
1306 mc->smb_bboard = fm_smb_mc_bboards(bdf);
1307 #ifdef DEBUG
1308 if (mc->smb_bboard == NULL)
1309 cmn_err(CE_NOTE,
1310 "!mc read smbios base boards info failed");
1311 #endif /* DEBUG */
1312 }
1313
1314 if (pci_rp != NULL)
1315 kmem_free(pci_rp, m);
1316 }
1317
1318 /*ARGSUSED*/
1319 static int
mc_create_cb(cmi_hdl_t whdl,void * arg1,void * arg2,void * arg3)1320 mc_create_cb(cmi_hdl_t whdl, void *arg1, void *arg2, void *arg3)
1321 {
1322 chipid_t chipid = *((chipid_t *)arg1);
1323 cmi_hdl_t *hdlp = (cmi_hdl_t *)arg2;
1324
1325 if (cmi_hdl_chipid(whdl) == chipid) {
1326 cmi_hdl_hold(whdl); /* short-term hold */
1327 *hdlp = whdl;
1328 return (CMI_HDL_WALK_DONE);
1329 } else {
1330 return (CMI_HDL_WALK_NEXT);
1331 }
1332 }
1333
1334 static mc_t *
mc_create(chipid_t chipid,dev_info_t * dip)1335 mc_create(chipid_t chipid, dev_info_t *dip)
1336 {
1337 mc_t *mc;
1338 cmi_hdl_t hdl = NULL;
1339
1340 ASSERT(RW_WRITE_HELD(&mc_lock));
1341
1342 /*
1343 * Find a handle for one of a chip's CPU.
1344 *
1345 * We can use one of the chip's CPUs since all cores
1346 * of a chip share the same revision and socket type.
1347 */
1348 cmi_hdl_walk(mc_create_cb, (void *)&chipid, (void *)&hdl, NULL);
1349 if (hdl == NULL)
1350 return (NULL); /* no cpu for this chipid found! */
1351
1352 mc = kmem_zalloc(sizeof (mc_t), KM_SLEEP);
1353
1354 mc->mc_hdr.mch_type = MC_NT_MC;
1355 mc->mc_props.mcp_num = chipid;
1356 mc->mc_props.mcp_sparecs = MC_INVALNUM;
1357 mc->mc_props.mcp_badcs = MC_INVALNUM;
1358
1359 mc->mc_props.mcp_rev = cmi_hdl_chiprev(hdl);
1360 mc->mc_revname = cmi_hdl_chiprevstr(hdl);
1361 mc->mc_socket = cmi_hdl_getsockettype(hdl);
1362
1363 mc_read_smbios(mc, dip);
1364
1365 if (mc_list == NULL)
1366 mc_list = mc;
1367 if (mc_last != NULL)
1368 mc_last->mc_next = mc;
1369
1370 mc->mc_next = NULL;
1371 mc_last = mc;
1372
1373 cmi_hdl_rele(hdl);
1374
1375 return (mc);
1376 }
1377
1378 /*
1379 * Return the maximum scrubbing rate between r1 and r2, where r2 is extracted
1380 * from the specified 'cfg' register value using 'mask' and 'shift'. If a
1381 * value is zero, scrubbing is off so return the opposite value. Otherwise
1382 * the maximum rate is the smallest non-zero value of the two values.
1383 */
1384 static uint32_t
mc_scrubber_max(uint32_t r1,uint32_t cfg,uint32_t mask,uint32_t shift)1385 mc_scrubber_max(uint32_t r1, uint32_t cfg, uint32_t mask, uint32_t shift)
1386 {
1387 uint32_t r2 = (cfg & mask) >> shift;
1388
1389 if (r1 != 0 && r2 != 0)
1390 return (MIN(r1, r2));
1391
1392 return (r1 ? r1 : r2);
1393 }
1394
1395
1396 /*
1397 * Enable the memory scrubber. We must use the mc_pcicfg_{get32,put32}_nohdl
1398 * interfaces since we do not bind to function 3.
1399 */
1400 cmi_errno_t
mc_scrubber_enable(mc_t * mc)1401 mc_scrubber_enable(mc_t *mc)
1402 {
1403 mc_props_t *mcp = &mc->mc_props;
1404 chipid_t chipid = (chipid_t)mcp->mcp_num;
1405 x86_chiprev_t rev = (x86_chiprev_t)mcp->mcp_rev;
1406 mc_cfgregs_t *mcr = &mc->mc_cfgregs;
1407 union mcreg_scrubctl scrubctl;
1408 union mcreg_dramscrublo dalo;
1409 union mcreg_dramscrubhi dahi;
1410
1411 mcr->mcr_scrubctl = MCREG_VAL32(&scrubctl) =
1412 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL);
1413
1414 mcr->mcr_scrubaddrlo = MCREG_VAL32(&dalo) =
1415 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_LO);
1416
1417 mcr->mcr_scrubaddrhi = MCREG_VAL32(&dahi) =
1418 mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_HI);
1419
1420 if (mc_scrub_policy == MC_SCRUB_BIOSDEFAULT)
1421 return (MCREG_FIELD_CMN(&scrubctl, DramScrub) !=
1422 AMD_NB_SCRUBCTL_RATE_NONE ?
1423 CMI_SUCCESS : CMIERR_MC_NOMEMSCRUB);
1424
1425 /*
1426 * Disable DRAM scrubbing while we fiddle.
1427 */
1428 MCREG_FIELD_CMN(&scrubctl, DramScrub) = AMD_NB_SCRUBCTL_RATE_NONE;
1429 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL,
1430 MCREG_VAL32(&scrubctl));
1431
1432 /*
1433 * Setup DRAM Scrub Address Low and High registers for the
1434 * base address of this node, and to select srubber redirect.
1435 */
1436 MCREG_FIELD_CMN(&dalo, ScrubReDirEn) = 1;
1437 MCREG_FIELD_CMN(&dalo, ScrubAddrLo) =
1438 AMD_NB_SCRUBADDR_MKLO(mcp->mcp_base);
1439
1440 MCREG_FIELD_CMN(&dahi, ScrubAddrHi) =
1441 AMD_NB_SCRUBADDR_MKHI(mcp->mcp_base);
1442
1443 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_LO,
1444 MCREG_VAL32(&dalo));
1445 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_HI,
1446 MCREG_VAL32(&dahi));
1447
1448 if (mc_scrub_rate_dram > AMD_NB_SCRUBCTL_RATE_MAX) {
1449 cmn_err(CE_WARN, "mc_scrub_rate_dram is too large; "
1450 "resetting to 0x%x\n", AMD_NB_SCRUBCTL_RATE_MAX);
1451 mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_MAX;
1452 }
1453
1454 switch (mc_scrub_policy) {
1455 case MC_SCRUB_FIXED:
1456 /* Use the system value checked above */
1457 break;
1458
1459 default:
1460 cmn_err(CE_WARN, "Unknown mc_scrub_policy value %d - "
1461 "using default policy of MC_SCRUB_MAX", mc_scrub_policy);
1462 /*FALLTHRU*/
1463
1464 case MC_SCRUB_MAX:
1465 mc_scrub_rate_dram = mc_scrubber_max(mc_scrub_rate_dram,
1466 mcr->mcr_scrubctl, AMD_NB_SCRUBCTL_DRAM_MASK,
1467 AMD_NB_SCRUBCTL_DRAM_SHIFT);
1468 break;
1469 }
1470
1471 /*
1472 * OPTERON_ERRATUM_99:
1473 * This erratum applies on revisions D and earlier.
1474 * This erratum also applies on revisions E and later,
1475 * if BIOS uses chip-select hoisting instead of DRAM hole
1476 * mapping.
1477 *
1478 * Do not enable the dram scrubber if the chip-select ranges
1479 * for the node are not contiguous.
1480 */
1481 if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE &&
1482 mc->mc_csdiscontig) {
1483 cmn_err(CE_CONT, "?Opteron DRAM scrubber disabled on revision "
1484 "%s chip %d because DRAM hole is present on this node",
1485 mc->mc_revname, chipid);
1486 mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_NONE;
1487 }
1488
1489 /*
1490 * OPTERON_ERRATUM_101:
1491 * This erratum applies on revisions D and earlier.
1492 *
1493 * If the DRAM Base Address register's IntlvEn field indicates that
1494 * node interleaving is enabled, we must disable the DRAM scrubber
1495 * and return zero to indicate that Solaris should use s/w instead.
1496 */
1497 if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE &&
1498 mcp->mcp_ilen != 0 &&
1499 !chiprev_at_least(rev, X86_CHIPREV_AMD_LEGACY_F_REV_E)) {
1500 cmn_err(CE_CONT, "?Opteron DRAM scrubber disabled on revision "
1501 "%s chip %d because DRAM memory is node-interleaved",
1502 mc->mc_revname, chipid);
1503 mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_NONE;
1504 }
1505
1506 if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE) {
1507 MCREG_FIELD_CMN(&scrubctl, DramScrub) = mc_scrub_rate_dram;
1508 mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL,
1509 MCREG_VAL32(&scrubctl));
1510 }
1511
1512 return (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE ?
1513 CMI_SUCCESS : CMIERR_MC_NOMEMSCRUB);
1514 }
1515
1516 /*ARGSUSED*/
1517 static int
mc_attach_cb(cmi_hdl_t whdl,void * arg1,void * arg2,void * arg3)1518 mc_attach_cb(cmi_hdl_t whdl, void *arg1, void *arg2, void *arg3)
1519 {
1520 mc_t *mc = (mc_t *)arg1;
1521 mcamd_prop_t chipid = *((mcamd_prop_t *)arg2);
1522
1523 if (cmi_hdl_chipid(whdl) == chipid) {
1524 mcamd_mc_register(whdl, mc);
1525 }
1526
1527 return (CMI_HDL_WALK_NEXT);
1528 }
1529
1530 static int mc_sw_scrub_disabled = 0;
1531
1532 static int
mc_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)1533 mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1534 {
1535 mc_pcicfg_hdl_t cfghdl;
1536 const mc_bind_map_t *bm;
1537 const char *bindnm;
1538 char *unitstr = NULL;
1539 enum mc_funcnum func;
1540 long unitaddr;
1541 int chipid, rc;
1542 mc_t *mc;
1543
1544 /*
1545 * This driver has no hardware state, but does
1546 * claim to have a reg property, so it will be
1547 * called on suspend. It is probably better to
1548 * make sure it doesn't get called on suspend,
1549 * but it is just as easy to make sure we just
1550 * return DDI_SUCCESS if called.
1551 */
1552 if (cmd == DDI_RESUME)
1553 return (DDI_SUCCESS);
1554
1555 if (cmd != DDI_ATTACH || mc_no_attach != 0)
1556 return (DDI_FAILURE);
1557
1558 bindnm = ddi_binding_name(dip);
1559 for (bm = mc_bind_map; bm->bm_bindnm != NULL; bm++) {
1560 if (strcmp(bindnm, bm->bm_bindnm) == 0) {
1561 func = bm->bm_func;
1562 break;
1563 }
1564 }
1565
1566 if (bm->bm_bindnm == NULL)
1567 return (DDI_FAILURE);
1568
1569 /*
1570 * We need the device number, which corresponds to the processor node
1571 * number plus 24. The node number can then be used to associate this
1572 * memory controller device with a given processor chip.
1573 */
1574 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
1575 DDI_PROP_DONTPASS, "unit-address", &unitstr) != DDI_PROP_SUCCESS) {
1576 cmn_err(CE_WARN, "failed to find unit-address for %s", bindnm);
1577 return (DDI_FAILURE);
1578 }
1579
1580 rc = ddi_strtol(unitstr, NULL, 16, &unitaddr);
1581 ASSERT(rc == 0 && unitaddr >= MC_AMD_DEV_OFFSET);
1582
1583 if (rc != 0 || unitaddr < MC_AMD_DEV_OFFSET) {
1584 cmn_err(CE_WARN, "failed to parse unit address %s for %s\n",
1585 unitstr, bindnm);
1586 ddi_prop_free(unitstr);
1587 return (DDI_FAILURE);
1588 }
1589 ddi_prop_free(unitstr);
1590
1591 chipid = unitaddr - MC_AMD_DEV_OFFSET;
1592
1593 rw_enter(&mc_lock, RW_WRITER);
1594
1595 for (mc = mc_list; mc != NULL; mc = mc->mc_next) {
1596 if (mc->mc_props.mcp_num == chipid)
1597 break;
1598 }
1599
1600 /* Integrate this memory controller device into existing set */
1601 if (mc == NULL) {
1602 mc = mc_create(chipid, dip);
1603
1604 if (mc == NULL) {
1605 /*
1606 * We don't complain here because this is a legitimate
1607 * path for MP systems. On those machines, we'll attach
1608 * before all CPUs have been initialized, and thus the
1609 * chip verification in mc_create will fail. We'll be
1610 * reattached later for those CPUs.
1611 */
1612 rw_exit(&mc_lock);
1613 return (DDI_FAILURE);
1614 }
1615 } else {
1616 mc_snapshot_destroy(mc);
1617 }
1618
1619 /* Beyond this point, we're committed to creating this node */
1620
1621 mc_fm_init(dip);
1622
1623 ASSERT(mc->mc_funcs[func].mcf_devi == NULL);
1624 mc->mc_funcs[func].mcf_devi = dip;
1625 mc->mc_funcs[func].mcf_instance = ddi_get_instance(dip);
1626
1627 mc->mc_ref++;
1628
1629 /*
1630 * Add the common properties to this node, and then add any properties
1631 * that are specific to this node based upon its configuration space.
1632 */
1633 (void) ddi_prop_update_string(DDI_DEV_T_NONE,
1634 dip, "model", (char *)bm->bm_model);
1635
1636 (void) ddi_prop_update_int(DDI_DEV_T_NONE,
1637 dip, "chip-id", mc->mc_props.mcp_num);
1638
1639 if (bm->bm_mkprops != NULL &&
1640 mc_pcicfg_setup(mc, bm->bm_func, &cfghdl) == DDI_SUCCESS) {
1641 bm->bm_mkprops(cfghdl, mc);
1642 mc_pcicfg_teardown(cfghdl);
1643 }
1644
1645 /*
1646 * If this is the last node to be attached for this memory controller,
1647 * then create the minor node, enable scrubbers, and register with
1648 * cpu module(s) for this chip.
1649 */
1650 if (func == MC_FUNC_DEVIMAP) {
1651 mc_props_t *mcp = &mc->mc_props;
1652 int dram_present = 0;
1653
1654 if (ddi_create_minor_node(dip, "mc-amd", S_IFCHR,
1655 mcp->mcp_num, "ddi_mem_ctrl",
1656 0) != DDI_SUCCESS) {
1657 cmn_err(CE_WARN, "failed to create minor node for chip "
1658 "%d memory controller\n",
1659 (chipid_t)mcp->mcp_num);
1660 }
1661
1662 /*
1663 * Register the memory controller for every CPU of this chip.
1664 *
1665 * If there is memory present on this node and ECC is enabled
1666 * attempt to enable h/w memory scrubbers for this node.
1667 * If we are successful in enabling *any* hardware scrubbers,
1668 * disable the software memory scrubber.
1669 */
1670 cmi_hdl_walk(mc_attach_cb, (void *)mc, (void *)&mcp->mcp_num,
1671 NULL);
1672
1673 if (mcp->mcp_lim != mcp->mcp_base) {
1674 /*
1675 * This node may map non-dram memory alone, so we
1676 * must check for an enabled chip-select to be
1677 * sure there is dram present.
1678 */
1679 mc_cs_t *mccs;
1680
1681 for (mccs = mc->mc_cslist; mccs != NULL;
1682 mccs = mccs->mccs_next) {
1683 if (mccs->mccs_props.csp_csbe) {
1684 dram_present = 1;
1685 break;
1686 }
1687 }
1688 }
1689
1690 if (dram_present && !mc_ecc_enabled(mc)) {
1691 /*
1692 * On a single chip system there is no point in
1693 * scrubbing if there is no ECC on the single node.
1694 * On a multichip system, necessarily Opteron using
1695 * registered ECC-capable DIMMs, if there is memory
1696 * present on a node but no ECC there then we'll assume
1697 * ECC is disabled for all nodes and we will not enable
1698 * the scrubber and wll also disable the software
1699 * memscrub thread.
1700 */
1701 rc = 1;
1702 } else if (!dram_present) {
1703 /* No memory on this node - others decide memscrub */
1704 rc = 0;
1705 } else {
1706 /*
1707 * There is memory on this node and ECC is enabled.
1708 * Call via the cpu module to enable memory scrubbing
1709 * on this node - we could call directly but then
1710 * we may overlap with a request to enable chip-cache
1711 * scrubbing.
1712 */
1713 rc = mc_scrubber_enable(mc);
1714 }
1715
1716 if (rc == CMI_SUCCESS && !mc_sw_scrub_disabled++)
1717 cmi_mc_sw_memscrub_disable();
1718
1719 mc_report_testfails(mc);
1720 }
1721
1722 /*
1723 * Update nvlist for as far as we have gotten in attach/init.
1724 */
1725 nvlist_free(mc->mc_nvl);
1726 mc->mc_nvl = mc_nvl_create(mc);
1727
1728 rw_exit(&mc_lock);
1729 return (DDI_SUCCESS);
1730 }
1731
1732 /*ARGSUSED*/
1733 static int
mc_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)1734 mc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1735 {
1736 /*
1737 * See the comment about suspend in
1738 * mc_attach().
1739 */
1740 if (cmd == DDI_SUSPEND)
1741 return (DDI_SUCCESS);
1742 else
1743 return (DDI_FAILURE);
1744 }
1745
1746
1747 static struct dev_ops mc_ops = {
1748 DEVO_REV, /* devo_rev */
1749 0, /* devo_refcnt */
1750 mc_getinfo, /* devo_getinfo */
1751 nulldev, /* devo_identify */
1752 nulldev, /* devo_probe */
1753 mc_attach, /* devo_attach */
1754 mc_detach, /* devo_detach */
1755 nodev, /* devo_reset */
1756 &mc_cb_ops, /* devo_cb_ops */
1757 NULL, /* devo_bus_ops */
1758 NULL, /* devo_power */
1759 ddi_quiesce_not_needed, /* devo_quiesce */
1760 };
1761
1762 static struct modldrv modldrv = {
1763 &mod_driverops,
1764 "Memory Controller for AMD processors",
1765 &mc_ops
1766 };
1767
1768 static struct modlinkage modlinkage = {
1769 MODREV_1,
1770 (void *)&modldrv,
1771 NULL
1772 };
1773
1774 int
_init(void)1775 _init(void)
1776 {
1777 /*
1778 * Refuse to load if there is no PCI config space support.
1779 */
1780 if (pci_getl_func == NULL)
1781 return (ENOTSUP);
1782
1783 rw_init(&mc_lock, NULL, RW_DRIVER, NULL);
1784 return (mod_install(&modlinkage));
1785 }
1786
1787 int
_info(struct modinfo * modinfop)1788 _info(struct modinfo *modinfop)
1789 {
1790 return (mod_info(&modlinkage, modinfop));
1791 }
1792
1793 int
_fini(void)1794 _fini(void)
1795 {
1796 int rc;
1797
1798 if ((rc = mod_remove(&modlinkage)) != 0)
1799 return (rc);
1800
1801 rw_destroy(&mc_lock);
1802 return (0);
1803 }
1804