1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include <strings.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <fm/fmd_api.h>
32 #include <sys/fm/protocol.h>
33 #include <sys/async.h>
34 #include <sys/time.h>
35 #include <cmd.h>
36 #include <cmd_state.h>
37 #include <cmd_mem.h>
38 #include <cmd_dp.h>
39 #include <cmd_dp_page.h>
40 #include <libnvpair.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <sys/mem.h>
44 #include <sys/plat_datapath.h>
45
46 /*ARGSUSED*/
47 static nvlist_t *
dp_cpu_fmri(fmd_hdl_t * hdl,uint32_t cpuid,uint64_t serial_id)48 dp_cpu_fmri(fmd_hdl_t *hdl, uint32_t cpuid, uint64_t serial_id)
49 {
50 nvlist_t *nvl = NULL;
51 int err;
52 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
53
54 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
55 return (NULL);
56
57 err = nvlist_add_string(nvl, FM_FMRI_SCHEME, FM_FMRI_SCHEME_CPU);
58 err |= nvlist_add_uint8(nvl, FM_VERSION, FM_CPU_SCHEME_VERSION);
59 err |= nvlist_add_uint32(nvl, FM_FMRI_CPU_ID, cpuid);
60
61 /*
62 * Version 1 calls for a string-based serial number
63 */
64 (void) snprintf(sbuf, sizeof (sbuf), "%llX", (u_longlong_t)serial_id);
65 err |= nvlist_add_string(nvl, FM_FMRI_CPU_SERIAL_ID, sbuf);
66 if (err != 0) {
67 nvlist_free(nvl);
68 return (NULL);
69 }
70 return (nvl);
71 }
72
73 cmd_dp_t *
cmd_dp_lookup_fault(fmd_hdl_t * hdl,uint32_t cpuid)74 cmd_dp_lookup_fault(fmd_hdl_t *hdl, uint32_t cpuid)
75 {
76 cmd_dp_t *ptr;
77 int i, found = 0;
78
79 /*
80 * Scan the cmd.cmd_datapaths list to see if there is
81 * a fault event present that impacts 'cpuid'
82 */
83 for (ptr = cmd_list_next(&cmd.cmd_datapaths); ptr != NULL;
84 ptr = cmd_list_next(ptr)) {
85 if (ptr->dp_erpt_type == DP_FAULT) {
86 for (i = 0; i < ptr->dp_ncpus; i++) {
87 if (ptr->dp_cpuid_list[i] == cpuid) {
88 found = 1;
89 break;
90 }
91 }
92 }
93 if (found)
94 break;
95 }
96
97 /*
98 * Check if the FMRI for the found cpuid exists in the domain.
99 * If it does not, it implies a DR has been done and this DP_FAULT
100 * is no longer needed.
101 */
102 if (ptr != NULL) {
103 nvlist_t *nvl;
104
105 nvl = dp_cpu_fmri(hdl, ptr->dp_cpuid_list[i],
106 ptr->dp_serid_list[i]);
107
108 if (nvl != NULL) {
109 if (!fmd_nvl_fmri_present(hdl, nvl)) {
110 cmd_dp_destroy(hdl, ptr);
111 ptr = NULL;
112 }
113 nvlist_free(nvl);
114 }
115 }
116 return (ptr);
117 }
118
119 cmd_dp_t *
cmd_dp_lookup_error(cmd_dp_t * dp)120 cmd_dp_lookup_error(cmd_dp_t *dp)
121 {
122 cmd_dp_t *ptr;
123
124 /*
125 * Scan the cmd.cmd_datapaths list to see if there is
126 * an existing error that matches 'dp'. A match is if
127 * both dp_err and the base cpuid are identical
128 */
129 for (ptr = cmd_list_next(&cmd.cmd_datapaths); ptr != NULL;
130 ptr = cmd_list_next(ptr)) {
131 if (ptr->dp_erpt_type == DP_ERROR) {
132 if ((ptr->dp_err == dp->dp_err) &&
133 (ptr->dp_cpuid_list[0] == dp->dp_cpuid_list[0]))
134 return (ptr);
135 }
136 }
137 return (NULL);
138 }
139
140 /*
141 * Allocates an nvlist_t, and sets ASRU information according to
142 * the cmd_dp_t provided.
143 */
144 /*ARGSUSED*/
145 nvlist_t *
cmd_dp_setasru(fmd_hdl_t * hdl,cmd_dp_t * dpt)146 cmd_dp_setasru(fmd_hdl_t *hdl, cmd_dp_t *dpt)
147 {
148 nvlist_t *asru, *hcelem[DP_MAX_ASRUS];
149 int i, j, sz, err;
150 char buf[DP_MAX_BUF];
151
152 sz = dpt->dp_ncpus;
153
154 /* put ASRUs in an nvlist */
155 for (i = 0; i < sz; i++) {
156 (void) snprintf(buf, DP_MAX_BUF, "%d", dpt->dp_cpuid_list[i]);
157 if (nvlist_alloc(&hcelem[i], NV_UNIQUE_NAME, 0) != 0)
158 return (NULL);
159
160 err = nvlist_add_string(hcelem[i], FM_FMRI_HC_NAME,
161 FM_FMRI_CPU_ID);
162 err |= nvlist_add_string(hcelem[i], FM_FMRI_HC_ID, buf);
163 if (err != 0) {
164 for (j = 0; j < i + 1; j++)
165 nvlist_free(hcelem[j]);
166 return (NULL);
167 }
168 }
169
170 /* put it in an HC scheme */
171 if (nvlist_alloc(&asru, NV_UNIQUE_NAME, 0) != 0) {
172 for (j = 0; j < sz; j++)
173 nvlist_free(hcelem[j]);
174 return (NULL);
175 }
176 err = nvlist_add_uint8(asru, FM_VERSION, FM_HC_SCHEME_VERSION);
177 err |= nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
178 err |= nvlist_add_string(asru, FM_FMRI_HC_ROOT, "");
179 err |= nvlist_add_uint32(asru, FM_FMRI_HC_LIST_SZ, sz);
180 err |= nvlist_add_nvlist_array(asru, FM_FMRI_HC_LIST, &hcelem[0],
181 dpt->dp_ncpus);
182 if (err != 0) {
183 for (j = 0; j < sz; j++)
184 nvlist_free(hcelem[j]);
185 nvlist_free(asru);
186 return (NULL);
187 }
188
189 /* free up memory */
190 for (j = 0; j < sz; j++)
191 nvlist_free(hcelem[j]);
192
193 /* return the ASRU */
194 return (asru);
195 }
196
197 void
dp_buf_write(fmd_hdl_t * hdl,cmd_dp_t * dp)198 dp_buf_write(fmd_hdl_t *hdl, cmd_dp_t *dp)
199 {
200 size_t sz;
201
202 if ((sz = fmd_buf_size(hdl, NULL, dp->dp_bufname)) != 0 &&
203 sz != sizeof (cmd_dp_pers_t))
204 fmd_buf_destroy(hdl, NULL, dp->dp_bufname);
205
206 fmd_buf_write(hdl, NULL, dp->dp_bufname, &dp->dp_pers,
207 sizeof (cmd_dp_pers_t));
208 }
209
210 static cmd_dp_t *
dp_wrapv0(fmd_hdl_t * hdl,cmd_dp_pers_t * pers,size_t psz)211 dp_wrapv0(fmd_hdl_t *hdl, cmd_dp_pers_t *pers, size_t psz)
212 {
213 cmd_dp_t *dp;
214
215 if (psz != sizeof (cmd_dp_pers_t)) {
216 fmd_hdl_abort(hdl, "size of state doesn't match size of "
217 "version 1 state (%u bytes).\n", sizeof (cmd_dp_pers_t));
218 }
219
220 dp = fmd_hdl_zalloc(hdl, sizeof (cmd_dp_t), FMD_SLEEP);
221 bcopy(pers, dp, sizeof (cmd_dp_pers_t));
222 fmd_hdl_free(hdl, pers, psz);
223 return (dp);
224 }
225
226 void *
cmd_dp_restore(fmd_hdl_t * hdl,fmd_case_t * cp,cmd_case_ptr_t * ptr)227 cmd_dp_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
228 {
229 cmd_dp_t *dp;
230
231 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL;
232 dp = cmd_list_next(dp)) {
233 if (dp->dp_case == cp)
234 break;
235 }
236
237 if (dp == NULL) {
238 size_t dpsz;
239
240 fmd_hdl_debug(hdl, "restoring dp from %s\n", ptr->ptr_name);
241
242 if ((dpsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
243 if (fmd_case_solved(hdl, cp) ||
244 fmd_case_closed(hdl, cp)) {
245 fmd_hdl_debug(hdl, "dp %s from case %s not "
246 "found. Case is already solved or closed\n",
247 ptr->ptr_name, fmd_case_uuid(hdl, cp));
248 return (NULL);
249 } else {
250 fmd_hdl_abort(hdl, "dp referenced by case %s "
251 "does not exist in saved state\n",
252 fmd_case_uuid(hdl, cp));
253 }
254 } else if (dpsz > CMD_DP_MAXSIZE ||
255 dpsz < CMD_DP_MINSIZE) {
256 fmd_hdl_abort(hdl, "dp buffer referenced by "
257 "case %s is out of bounds (is %u bytes, "
258 "max %u, min %u)\n", fmd_case_uuid(hdl, cp),
259 dpsz, CMD_DP_MAXSIZE, CMD_DP_MINSIZE);
260 }
261
262 if ((dp = cmd_buf_read(hdl, NULL, ptr->ptr_name, dpsz)) == NULL)
263 fmd_hdl_abort(hdl, "failed to read dp buf %s",
264 ptr->ptr_name);
265
266 switch (dp->dp_version) {
267 case CMD_DP_VERSION_0:
268 dp = dp_wrapv0(hdl, (cmd_dp_pers_t *)dp, dpsz);
269 break;
270 default:
271 fmd_hdl_abort(hdl, "unknown version (found %d) "
272 "for dp state referenced by case %s.\n",
273 dp->dp_version, fmd_case_uuid(hdl, cp));
274 break;
275 }
276
277 dp->dp_case = cp;
278
279 if (dp->dp_erpt_type == DP_ERROR) {
280 fmd_event_t *ep = fmd_case_getprincipal(hdl, cp);
281
282 ++cmd.cmd_dp_flag;
283
284 dp->dp_id = fmd_timer_install(hdl,
285 (void *)CMD_TIMERTYPE_DP, ep,
286 (hrtime_t)NANOSEC * (dp->dp_t_value + 120));
287 }
288
289 cmd_list_append(&cmd.cmd_datapaths, dp);
290 }
291
292 return (dp);
293 }
294
295 void
cmd_dp_close(fmd_hdl_t * hdl,void * arg)296 cmd_dp_close(fmd_hdl_t *hdl, void *arg)
297 {
298 cmd_dp_destroy(hdl, arg);
299 }
300
301 void
cmd_dp_timeout(fmd_hdl_t * hdl,id_t id)302 cmd_dp_timeout(fmd_hdl_t *hdl, id_t id)
303 {
304 cmd_dp_t *dp;
305
306 /* close case associated with the timer */
307 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL;
308 dp = cmd_list_next(dp)) {
309 if (dp->dp_id == id) {
310 cmd_dp_destroy(hdl, dp);
311 break;
312 }
313 }
314
315 fmd_hdl_debug(hdl, "cmd_dp_timeout() complete\n");
316 }
317
318 /*
319 * Validate by matching each cmd_dp_t cpu and serial id to what is
320 * installed and active on this machine or domain. Delete the cmd_dp_t
321 * if no match is made.
322 */
323 void
cmd_dp_validate(fmd_hdl_t * hdl)324 cmd_dp_validate(fmd_hdl_t *hdl)
325 {
326 cmd_dp_t *dp, *next;
327 nvlist_t *nvl;
328 int i, no_match;
329
330 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL; dp = next) {
331 next = cmd_list_next(dp);
332
333 for (i = 0, no_match = 0; i < dp->dp_ncpus; i++) {
334 nvl = dp_cpu_fmri(hdl, dp->dp_cpuid_list[i],
335 dp->dp_serid_list[i]);
336
337 if (nvl == NULL)
338 fmd_hdl_abort(hdl, "could not make CPU fmri");
339
340 if (!fmd_nvl_fmri_present(hdl, nvl))
341 no_match = 1;
342
343 nvlist_free(nvl);
344
345 if (no_match) {
346 cmd_dp_destroy(hdl, dp);
347 break;
348 }
349 }
350 }
351 }
352
353 static void
cmd_dp_free(fmd_hdl_t * hdl,cmd_dp_t * dp,int destroy)354 cmd_dp_free(fmd_hdl_t *hdl, cmd_dp_t *dp, int destroy)
355 {
356 if (dp->dp_case != NULL)
357 cmd_case_fini(hdl, dp->dp_case, destroy);
358
359 if (destroy && dp->dp_erpt_type == DP_ERROR) {
360 --cmd.cmd_dp_flag;
361 /*
362 * If there are no active datapath events, replay any
363 * pages that were deferred.
364 */
365 if (cmd.cmd_dp_flag == 0)
366 cmd_dp_page_replay(hdl);
367 }
368
369 if (destroy)
370 fmd_buf_destroy(hdl, NULL, dp->dp_bufname);
371
372 cmd_list_delete(&cmd.cmd_datapaths, dp);
373 fmd_hdl_free(hdl, dp, sizeof (cmd_dp_t));
374 }
375
376 void
cmd_dp_destroy(fmd_hdl_t * hdl,cmd_dp_t * dp)377 cmd_dp_destroy(fmd_hdl_t *hdl, cmd_dp_t *dp)
378 {
379 cmd_dp_free(hdl, dp, FMD_B_TRUE);
380 }
381
382 /*ARGSUSED*/
383 int
cmd_dp_error(fmd_hdl_t * hdl)384 cmd_dp_error(fmd_hdl_t *hdl)
385 {
386 if (cmd.cmd_dp_flag)
387 return (1);
388 else
389 return (0);
390 }
391
392 int
cmd_dp_get_mcid(uint64_t addr,int * mcid)393 cmd_dp_get_mcid(uint64_t addr, int *mcid)
394 {
395 int fd, rc;
396 mem_info_t data;
397
398 if ((fd = open("/dev/mem", O_RDONLY)) < 0)
399 return (-1);
400
401 data.m_addr = addr;
402 data.m_synd = 0;
403 if ((rc = ioctl(fd, MEM_INFO, &data)) < 0) {
404 (void) close(fd);
405 return (rc);
406 }
407
408 (void) close(fd);
409 *mcid = data.m_mcid;
410
411 return (0);
412 }
413
414 /*ARGSUSED*/
415 int
cmd_dp_fault(fmd_hdl_t * hdl,uint64_t addr)416 cmd_dp_fault(fmd_hdl_t *hdl, uint64_t addr)
417 {
418 int mcid;
419
420 if (cmd_dp_get_mcid(addr, &mcid) < 0)
421 fmd_hdl_abort(hdl, "cmd_dp_get_mcid failed");
422
423 if (cmd_dp_lookup_fault(hdl, mcid) != NULL)
424 return (1);
425 else
426 return (0);
427 }
428
429 void
cmd_dp_fini(fmd_hdl_t * hdl)430 cmd_dp_fini(fmd_hdl_t *hdl)
431 {
432 cmd_dp_t *dp;
433 cmd_dp_defer_t *dpage;
434
435 while ((dp = cmd_list_next(&cmd.cmd_datapaths)) != NULL)
436 cmd_dp_free(hdl, dp, FMD_B_FALSE);
437
438 while ((dpage = cmd_list_next(&cmd.cmd_deferred_pages)) != NULL) {
439 cmd_list_delete(&cmd.cmd_deferred_pages, dpage);
440 fmd_hdl_free(hdl, dpage, sizeof (cmd_dp_defer_t));
441 }
442 }
443