xref: /titanic_44/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_dimm.h (revision d4ac42a1cd3016618a9ba0330862d410f0058f89)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #ifndef _CMD_DIMM_H
26 #define	_CMD_DIMM_H
27 
28 /*
29  * Memory modules are described by the cmd_dimm general-purpose state structure.
30  * Whereas banks are primarily used to track UEs, this structure is used to
31  * track CEs, which can be associated with individual modules.  Each memory
32  * module is part of a bank, and will have a link to the bank if the bank is
33  * known to the diagnosis engine.  Banks will be known if UEs have occurred.
34  *
35  * Data structures:
36  *
37  *     ,--------.       ,--------.
38  *     |dimm    | <---- |case_ptr| (CMD_PTR_DIMM_CASE)
39  *     |        |       `--------'
40  *     |,-------|       ,-------------.
41  *  ,->||asru_t | ----> |packed nvlist|
42  *  |  |`-------|       `-------------'
43  *  `--| unum   |
44  *     | bank   | ----> bank buffer
45  *     `--------'
46  *
47  * Data structure	P?  Case? Notes
48  * ----------------	--- ----- ----------------------------------------------
49  * cmd_dimm_t		Yes No    Name is derived from the unum ("dimm_%s")
50  * cmd_case_ptr_t	Yes Yes   Name is case's UUID
51  * dimm_asru		Yes No    Name is derived from the unum ("dimm_asru_%d")
52  * dimm_unum		No  No    Pointer into ASRU - relinked during restore
53  * dimm_bank		No  No    Recreated during restore
54  */
55 
56 #include <cmd_mem.h>
57 #include <values.h>
58 
59 #ifdef __cplusplus
60 extern "C" {
61 #endif
62 
63 /*
64  * CMD_MAX_CKWDS denotes the highest number, across all covered
65  * SPARC architectures, of checkwords per cache line.
66  */
67 
68 #define	CMD_MAX_CKWDS	4
69 
70 /*
71  * The DIMM structure started life without a version number.  Making things more
72  * complicated, the version number in the new struct occupies the space used for
73  * the case pointer in the non-versioned struct.  We therefore have to use
74  * somewhat unorthodox version numbers so as to allow us to easily tell the
75  * difference between a version number and a case pointer.  Case pointers will
76  * be zero or (this being SPARC), a value with the bottom two bits clear.  Our
77  * version numbers will begin with 0x11, and will increase by 0x10 each time.
78  */
79 
80 #define	DIMM_MKVERSION(version)	((version) << 4 | 1)
81 
82 #define	CMD_DIMM_VERSION_1	DIMM_MKVERSION(1)	/* 17 */
83 #define	CMD_DIMM_VERSION_2	DIMM_MKVERSION(2)	/* 33 */
84 #define	CMD_DIMM_VERSION	CMD_DIMM_VERSION_2
85 
86 #define	CMD_DIMM_VERSIONED(dimm)	((dimm)->dimm_version & 1)
87 
88 #define	CMD_DIMM_STAT_PREFIX		"d"	/* d = dimm */
89 
90 typedef struct cmd_dimm_0 {
91 	cmd_header_t dimm0_header;	/* Nodetype must be CMD_NT_DIMM */
92 	fmd_case_t *dimm0_case;		/* Open CE case against this DIMM */
93 	cmd_fmri_t dimm0_asru;		/* ASRU for this DIMM */
94 	const char *dimm0_unum;		/* This DIMM's name */
95 	uint_t dimm0_wrnthresh;		/* # of pages retired before warning */
96 	uint_t dimm0_nretired;		/* # ret'd pages for CEs in DIMM */
97 	cmd_bank_t *dimm0_bank;		/* This DIMM's bank (if discovered) */
98 } cmd_dimm_0_t;
99 
100 typedef struct cmd_dimm_1 {
101 	cmd_header_t dimm1_header;	/* Nodetype must be CMD_NT_DIMM */
102 	uint_t dimm1_version;		/* DIMM version */
103 	cmd_fmri_t dimm1_asru;		/* ASRU for this DIMM */
104 	uint_t dimm1_flags;		/* CMD_MEM_F_* */
105 	uint_t dimm1_nretired;		/* # ret'd pages for CEs in DIMM */
106 } cmd_dimm_1_t;
107 
108 
109 typedef struct cmd_dimm_pers {
110 	cmd_header_t dimmp_header;	/* Nodetype must be CMD_NT_DIMM */
111 	uint_t dimmp_version;
112 	cmd_fmri_t dimmp_asru;		/* ASRU for this DIMM */
113 	uint_t dimmp_flags;		/* CMD_MEM_F_* */
114 	uint_t dimmp_nretired;		/* # ret'd pages for CEs in DIMM */
115 	uint64_t dimmp_phys_addr_low;	/* retired pages low addr */
116 	uint64_t dimmp_phys_addr_hi;	/* retired pages hi addr */
117 } cmd_dimm_pers_t;
118 
119 /*
120  * Index block for MQSC rules 4A and 4B correlation of memory CEs
121  * on a single DIMM. "Unit Position" refers to bit or nibble depending
122  * on the memory ECC.  This structure is not persisted.
123  */
124 
125 typedef struct cmd_mq {
126 	cmd_list_t mq_l;		/* pointers to prev and next */
127 	uint64_t mq_tstamp;		/* timestamp of ereport in secs */
128 	uint16_t mq_ckwd;		/* phys addr mod 64 */
129 	uint64_t mq_phys_addr;		/* from ereport */
130 	uint16_t mq_unit_position;	/* bit for sun4u, nibble for sun4v */
131 	fmd_event_t *mq_ep;		/* ereport - for potential fault */
132 	char *mq_serdnm;		/* serd eng to retain CE events */
133 	uint16_t mq_dupce_count;	/* dup CEs */
134 	cmd_list_t mq_dupce_tstamp;	/* list of dup CEs time stamp */
135 	uint32_t mq_cpuid;		/* ereport detector */
136 } cmd_mq_t;
137 
138 typedef struct tstamp {
139 	cmd_list_t ts_l;
140 	uint64_t tstamp;
141 } tstamp_t;
142 
143 struct cmd_dimm {
144 	cmd_dimm_pers_t dimm_pers;
145 	cmd_bank_t *dimm_bank;		/* This DIMM's bank (if discovered) */
146 	const char *dimm_unum;		/* This DIMM's name */
147 	cmd_case_t dimm_case;		/* Open CE case against this DIMM */
148 	fmd_stat_t dimm_retstat;	/* retirement statistics, this DIMM */
149 	uint16_t dimm_syl_error;	/* bad r/w symbol-in-error */
150 	cmd_list_t
151 	    mq_root[CMD_MAX_CKWDS];	/* per-checkword CEs to correlate */
152 };
153 
154 #define	CMD_MQ_TIMELIM	(72*60*60)	/* 72 hours */
155 #define	CMD_MQ_SERDT	MAXINT		/* Never expected to fire */
156 #define	CMD_MQ_SERDN	2		/* Dup CEs not allowed */
157 #define	CMD_MQ_512KB	0x80000		/* space between low & hi retired */
158 					/* page addrss */
159 #define	CMD_PAGE_RATIO	0.0625		/* bad r/w page ratio (1/16) */
160 
161 #define	CMD_DIMM_MAXSIZE \
162 	MAX(MAX(sizeof (cmd_dimm_0_t), sizeof (cmd_dimm_pers_t)), \
163 	MAX(sizeof (cmd_dimm_1_t), sizeof (cmd_dimm_pers_t)))
164 #define	CMD_DIMM_MINSIZE \
165 	MIN(MIN(sizeof (cmd_dimm_0_t), sizeof (cmd_dimm_pers_t)), \
166 	MIN(sizeof (cmd_dimm_1_t), sizeof (cmd_dimm_pers_t)))
167 
168 #define	dimm_header		dimm_pers.dimmp_header
169 #define	dimm_nodetype		dimm_pers.dimmp_header.hdr_nodetype
170 #define	dimm_bufname		dimm_pers.dimmp_header.hdr_bufname
171 #define	dimm_version		dimm_pers.dimmp_version
172 #define	dimm_asru		dimm_pers.dimmp_asru
173 #define	dimm_asru_nvl		dimm_pers.dimmp_asru.fmri_nvl
174 #define	dimm_flags		dimm_pers.dimmp_flags
175 #define	dimm_nretired		dimm_pers.dimmp_nretired
176 #define	dimm_phys_addr_hi	dimm_pers.dimmp_phys_addr_hi
177 #define	dimm_phys_addr_low	dimm_pers.dimmp_phys_addr_low
178 
179 extern cmd_dimm_t *cmd_dimm_lookup(fmd_hdl_t *, nvlist_t *);
180 extern cmd_dimm_t *cmd_dimm_create(fmd_hdl_t *, nvlist_t *);
181 
182 extern nvlist_t *cmd_dimm_fru(cmd_dimm_t *);
183 extern nvlist_t *cmd_dimm_create_fault(fmd_hdl_t *, cmd_dimm_t *, const char *,
184     uint_t);
185 #ifdef sun4v
186 extern nvlist_t *cmd_mem2hc(fmd_hdl_t *, nvlist_t *);
187 #endif /* sun4v */
188 
189 extern nvlist_t *cmd_dimm_fmri_derive(fmd_hdl_t *, uint64_t, uint16_t,
190     uint64_t);
191 extern int cmd_dimm_thresh_reached(fmd_hdl_t *, cmd_dimm_t *, uint64_t,
192     uint16_t);
193 
194 extern void cmd_dimm_dirty(fmd_hdl_t *, cmd_dimm_t *);
195 extern void *cmd_dimm_restore(fmd_hdl_t *, fmd_case_t *, cmd_case_ptr_t *);
196 extern void cmd_dimm_destroy(fmd_hdl_t *, cmd_dimm_t *);
197 extern void cmd_dimm_validate(fmd_hdl_t *);
198 extern void cmd_dimm_gc(fmd_hdl_t *);
199 extern void cmd_dimm_fini(fmd_hdl_t *);
200 
201 extern void cmd_dimmlist_free(fmd_hdl_t *);
202 extern void cmd_dimm_save_symbol_error(cmd_dimm_t *, uint16_t);
203 extern int cmd_dimm_check_symbol_error(cmd_dimm_t *, uint16_t);
204 
205 #ifdef __cplusplus
206 }
207 #endif
208 
209 #endif /* _CMD_DIMM_H */
210