1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Kernel Error Queues 30 * 31 * A common problem when handling hardware error traps and interrupts is that 32 * these errors frequently must be handled at high interrupt level, where 33 * reliably producing error messages and safely examining and manipulating 34 * other kernel state may not be possible. The kernel error queue primitive is 35 * a common set of routines that allow a subsystem to maintain a queue of 36 * errors that can be processed by an explicit call from a safe context or by a 37 * soft interrupt that fires at a specific lower interrupt level. The queue 38 * management code also ensures that if the system panics, all in-transit 39 * errors are logged prior to reset. Each queue has an associated kstat for 40 * observing the number of errors dispatched and logged, and mdb(1) debugging 41 * support is provided for live and post-mortem observability. 42 * 43 * Memory Allocation 44 * 45 * All of the queue data structures are allocated in advance as part of 46 * the errorq_create() call. No additional memory allocations are 47 * performed as part of errorq_dispatch(), errorq_reserve(), 48 * errorq_commit() or errorq_drain(). This design 49 * facilitates reliable error queue processing even when the system is low 50 * on memory, and ensures that errorq_dispatch() can be called from any 51 * context. When the queue is created, the maximum queue length is 52 * specified as a parameter to errorq_create() errorq_nvcreate(). This 53 * length should represent a reasonable upper bound on the number of 54 * simultaneous errors. If errorq_dispatch() or errorq_reserve() is 55 * invoked and no free queue elements are available, the error is 56 * dropped and will not be logged. Typically, the queue will only be 57 * exhausted by an error storm, and in this case 58 * the earlier errors provide the most important data for analysis. 59 * When a new error is dispatched, the error data is copied into the 60 * preallocated queue element so that the caller's buffer can be reused. 61 * 62 * When a new error is reserved, an element is moved from the free list 63 * and returned to the caller. The element buffer data, eqe_data, may be 64 * managed by the caller and dispatched to the errorq by calling 65 * errorq_commit(). This is useful for additions to errorq's 66 * created with errorq_nvcreate() to handle name-value pair (nvpair) data. 67 * See below for a discussion on nvlist errorq's. 68 * 69 * Queue Drain Callback 70 * 71 * When the error queue is drained, the caller's queue drain callback is 72 * invoked with a pointer to the saved error data. This function may be 73 * called from passive kernel context or soft interrupt context at or 74 * below LOCK_LEVEL, or as part of panic(). As such, the callback should 75 * basically only be calling cmn_err (but NOT with the CE_PANIC flag). 76 * The callback must not call panic(), attempt to allocate memory, or wait 77 * on a condition variable. The callback may not call errorq_destroy() 78 * or errorq_drain() on the same error queue that called it. 79 * 80 * The queue drain callback will always be called for each pending error 81 * in the order in which errors were enqueued (oldest to newest). The 82 * queue drain callback is guaranteed to provide at *least* once semantics 83 * for all errors that are successfully dispatched (i.e. for which 84 * errorq_dispatch() has successfully completed). If an unrelated panic 85 * occurs while the queue drain callback is running on a vital queue, the 86 * panic subsystem will continue the queue drain and the callback may be 87 * invoked again for the same error. Therefore, the callback should 88 * restrict itself to logging messages and taking other actions that are 89 * not destructive if repeated. 90 * 91 * Name-Value Pair Error Queues 92 * 93 * During error handling, it may be more convenient to store error 94 * queue element data as a fixed buffer of name-value pairs. The 95 * nvpair library allows construction and destruction of nvlists in 96 * in pre-allocated memory buffers. 97 * 98 * Error queues created via errorq_nvcreate() store queue element 99 * data as fixed buffer nvlists (ereports). errorq_reserve() 100 * allocates an errorq element from eqp->eq_free and returns a valid 101 * pointer to a errorq_elem_t (queue element) and a pre-allocated 102 * fixed buffer nvlist. errorq_elem_nvl() is used to gain access 103 * to the nvlist to add name-value ereport members prior to 104 * dispatching the error queue element in errorq_commit(). 105 * 106 * Once dispatched, the drain function will return the element to 107 * eqp->eq_free and reset the associated nv_alloc structure. 108 * error_cancel() may be called to cancel an element reservation 109 * element that was never dispatched (committed). This is useful in 110 * cases where a programming error prevents a queue element from being 111 * dispatched. 112 * 113 * Queue Management 114 * 115 * The queue element structures and error data buffers are allocated in 116 * two contiguous chunks as part of errorq_create() or errorq_nvcreate(). 117 * Each queue element structure contains a next pointer, 118 * a previous pointer, and a pointer to the corresponding error data 119 * buffer. The data buffer for a nvlist errorq is a shared buffer 120 * for the allocation of name-value pair lists. The elements are kept on 121 * one of three lists: 122 * 123 * Unused elements are kept on the free list, a singly-linked list pointed 124 * to by eqp->eq_free, and linked together using eqe_prev. The eqe_next 125 * pointer is not used by the free list and will be set to NULL. 126 * 127 * Pending errors are kept on the pending list, a singly-linked list 128 * pointed to by eqp->eq_pend, and linked together using eqe_prev. This 129 * list is maintained in order from newest error to oldest. The eqe_next 130 * pointer is not used by the pending list and will be set to NULL. 131 * 132 * The processing list is a doubly-linked list pointed to by eqp->eq_phead 133 * (the oldest element) and eqp->eq_ptail (the newest element). The 134 * eqe_next pointer is used to traverse from eq_phead to eq_ptail, and the 135 * eqe_prev pointer is used to traverse from eq_ptail to eq_phead. Once a 136 * queue drain operation begins, the current pending list is moved to the 137 * processing list in a two-phase commit fashion, allowing the panic code 138 * to always locate and process all pending errors in the event that a 139 * panic occurs in the middle of queue processing. 140 * 141 * A fourth list is maintained for nvlist errorqs. The dump list, 142 * eq_dump is used to link all errorq elements that should be stored 143 * in a crash dump file in the event of a system panic. During 144 * errorq_panic(), the list is created and subsequently traversed 145 * in errorq_dump() during the final phases of a crash dump. 146 * 147 * Platform Considerations 148 * 149 * In order to simplify their implementation, error queues make use of the 150 * C wrappers for compare-and-swap. If the platform itself does not 151 * support compare-and-swap in hardware and the kernel emulation routines 152 * are used instead, then the context in which errorq_dispatch() can be 153 * safely invoked is further constrained by the implementation of the 154 * compare-and-swap emulation. Specifically, if errorq_dispatch() is 155 * called from a code path that can be executed above ATOMIC_LEVEL on such 156 * a platform, the dispatch code could potentially deadlock unless the 157 * corresponding error interrupt is blocked or disabled prior to calling 158 * errorq_dispatch(). Error queues should therefore be deployed with 159 * caution on these platforms. 160 * 161 * Interfaces 162 * 163 * errorq_t *errorq_create(name, func, private, qlen, eltsize, ipl, flags); 164 * errorq_t *errorq_nvcreate(name, func, private, qlen, eltsize, ipl, flags); 165 * 166 * Create a new error queue with the specified name, callback, and 167 * properties. A pointer to the new error queue is returned upon success, 168 * or NULL is returned to indicate that the queue could not be created. 169 * This function must be called from passive kernel context with no locks 170 * held that can prevent a sleeping memory allocation from occurring. 171 * errorq_create() will return failure if the queue kstats cannot be 172 * created, or if a soft interrupt handler cannot be registered. 173 * 174 * The queue 'name' is a string that is recorded for live and post-mortem 175 * examination by a debugger. The queue callback 'func' will be invoked 176 * for each error drained from the queue, and will receive the 'private' 177 * pointer as its first argument. The callback must obey the rules for 178 * callbacks described above. The queue will have maximum length 'qlen' 179 * and each element will be able to record up to 'eltsize' bytes of data. 180 * The queue's soft interrupt (see errorq_dispatch(), below) will fire 181 * at 'ipl', which should not exceed LOCK_LEVEL. The queue 'flags' may 182 * include the following flag: 183 * 184 * ERRORQ_VITAL - This queue contains information that is considered 185 * vital to problem diagnosis. Error queues that are marked vital will 186 * be automatically drained by the panic subsystem prior to printing 187 * the panic messages to the console. 188 * 189 * void errorq_destroy(errorq); 190 * 191 * Destroy the specified error queue. The queue is drained of any 192 * pending elements and these are logged before errorq_destroy returns. 193 * Once errorq_destroy() begins draining the queue, any simultaneous 194 * calls to dispatch errors will result in the errors being dropped. 195 * The caller must invoke a higher-level abstraction (e.g. disabling 196 * an error interrupt) to ensure that error handling code does not 197 * attempt to dispatch errors to the queue while it is being freed. 198 * 199 * void errorq_dispatch(errorq, data, len, flag); 200 * 201 * Attempt to enqueue the specified error data. If a free queue element 202 * is available, the data is copied into a free element and placed on a 203 * pending list. If no free queue element is available, the error is 204 * dropped. The data length (len) is specified in bytes and should not 205 * exceed the queue's maximum element size. If the data length is less 206 * than the maximum element size, the remainder of the queue element is 207 * filled with zeroes. The flag parameter should be one of: 208 * 209 * ERRORQ_ASYNC - Schedule a soft interrupt at the previously specified 210 * IPL to asynchronously drain the queue on behalf of the caller. 211 * 212 * ERRORQ_SYNC - Do not schedule a soft interrupt to drain the queue. 213 * The caller is presumed to be calling errorq_drain() or panic() in 214 * the near future in order to drain the queue and log the error. 215 * 216 * The errorq_dispatch() function may be called from any context, subject 217 * to the Platform Considerations described above. 218 * 219 * void errorq_drain(errorq); 220 * 221 * Drain the error queue of all pending errors. The queue's callback 222 * function is invoked for each error in order from oldest to newest. 223 * This function may be used at or below LOCK_LEVEL or from panic context. 224 * 225 * errorq_elem_t *errorq_reserve(errorq); 226 * 227 * Reserve an error queue element for later processing and dispatching. 228 * The element is returned to the caller who may add error-specific data 229 * to element. The element is retured to the free list when either 230 * errorq_commit() is called and the element asynchronously processed 231 * or immediately when errorq_cancel() is called. 232 * 233 * void errorq_commit(errorq, errorq_elem, flag); 234 * 235 * Commit an errorq element (eqep) for dispatching, see 236 * errorq_dispatch(). 237 * 238 * void errorq_cancel(errorq, errorq_elem); 239 * 240 * Cancel a pending errorq element reservation. The errorq element is 241 * returned to the free list upon cancelation. 242 */ 243 244 #include <sys/errorq_impl.h> 245 #include <sys/sysmacros.h> 246 #include <sys/machlock.h> 247 #include <sys/cmn_err.h> 248 #include <sys/atomic.h> 249 #include <sys/systm.h> 250 #include <sys/kmem.h> 251 #include <sys/conf.h> 252 #include <sys/ddi.h> 253 #include <sys/sunddi.h> 254 #include <sys/bootconf.h> 255 #include <sys/spl.h> 256 #include <sys/dumphdr.h> 257 #include <sys/compress.h> 258 #include <sys/time.h> 259 #include <sys/panic.h> 260 #include <sys/fm/protocol.h> 261 #include <sys/fm/util.h> 262 263 static struct errorq_kstat errorq_kstat_template = { 264 { "dispatched", KSTAT_DATA_UINT64 }, 265 { "dropped", KSTAT_DATA_UINT64 }, 266 { "logged", KSTAT_DATA_UINT64 }, 267 { "reserved", KSTAT_DATA_UINT64 }, 268 { "reserve_fail", KSTAT_DATA_UINT64 }, 269 { "committed", KSTAT_DATA_UINT64 }, 270 { "commit_fail", KSTAT_DATA_UINT64 }, 271 { "cancelled", KSTAT_DATA_UINT64 } 272 }; 273 274 static uint64_t errorq_lost = 0; 275 static errorq_t *errorq_list = NULL; 276 static kmutex_t errorq_lock; 277 static uint64_t errorq_vitalmin = 5; 278 279 static uint_t 280 errorq_intr(caddr_t eqp) 281 { 282 errorq_drain((errorq_t *)eqp); 283 return (DDI_INTR_CLAIMED); 284 } 285 286 /* 287 * Create a new error queue with the specified properties and add a software 288 * interrupt handler and kstat for it. This function must be called from 289 * passive kernel context with no locks held that can prevent a sleeping 290 * memory allocation from occurring. This function will return NULL if the 291 * softint or kstat for this queue cannot be created. 292 */ 293 errorq_t * 294 errorq_create(const char *name, errorq_func_t func, void *private, 295 ulong_t qlen, size_t size, uint_t ipl, uint_t flags) 296 { 297 errorq_t *eqp = kmem_alloc(sizeof (errorq_t), KM_SLEEP); 298 ddi_iblock_cookie_t ibc = (ddi_iblock_cookie_t)(uintptr_t)ipltospl(ipl); 299 dev_info_t *dip = ddi_root_node(); 300 301 errorq_elem_t *eep; 302 ddi_softintr_t id = NULL; 303 caddr_t data; 304 305 ASSERT(qlen != 0 && size != 0); 306 ASSERT(ipl > 0 && ipl <= LOCK_LEVEL); 307 308 /* 309 * If a queue is created very early in boot before device tree services 310 * are available, the queue softint handler cannot be created. We 311 * manually drain these queues and create their softint handlers when 312 * it is safe to do so as part of errorq_init(), below. 313 */ 314 if (modrootloaded && ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id, 315 &ibc, NULL, errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) { 316 cmn_err(CE_WARN, "errorq_create: failed to register " 317 "IPL %u softint for queue %s", ipl, name); 318 kmem_free(eqp, sizeof (errorq_t)); 319 return (NULL); 320 } 321 322 if ((eqp->eq_ksp = kstat_create("unix", 0, name, "errorq", 323 KSTAT_TYPE_NAMED, sizeof (struct errorq_kstat) / 324 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL)) == NULL) { 325 cmn_err(CE_WARN, "errorq_create: failed to create kstat " 326 "for queue %s", name); 327 if (id != NULL) 328 ddi_remove_softintr(id); 329 kmem_free(eqp, sizeof (errorq_t)); 330 return (NULL); 331 } 332 333 bcopy(&errorq_kstat_template, &eqp->eq_kstat, 334 sizeof (struct errorq_kstat)); 335 eqp->eq_ksp->ks_data = &eqp->eq_kstat; 336 eqp->eq_ksp->ks_private = eqp; 337 kstat_install(eqp->eq_ksp); 338 339 (void) strncpy(eqp->eq_name, name, ERRORQ_NAMELEN); 340 eqp->eq_name[ERRORQ_NAMELEN] = '\0'; 341 eqp->eq_func = func; 342 eqp->eq_private = private; 343 eqp->eq_data = kmem_alloc(qlen * size, KM_SLEEP); 344 eqp->eq_qlen = qlen; 345 eqp->eq_size = size; 346 eqp->eq_ipl = ipl; 347 eqp->eq_flags = flags | ERRORQ_ACTIVE; 348 eqp->eq_id = id; 349 mutex_init(&eqp->eq_lock, NULL, MUTEX_DEFAULT, NULL); 350 eqp->eq_elems = kmem_alloc(qlen * sizeof (errorq_elem_t), KM_SLEEP); 351 eqp->eq_phead = NULL; 352 eqp->eq_ptail = NULL; 353 eqp->eq_pend = NULL; 354 eqp->eq_dump = NULL; 355 eqp->eq_free = eqp->eq_elems; 356 357 /* 358 * Iterate over the array of errorq_elem_t structures and place each 359 * one on the free list and set its data pointer. 360 */ 361 for (eep = eqp->eq_free, data = eqp->eq_data; qlen > 1; qlen--) { 362 eep->eqe_next = NULL; 363 eep->eqe_dump = NULL; 364 eep->eqe_prev = eep + 1; 365 eep->eqe_data = data; 366 data += size; 367 eep++; 368 } 369 370 eep->eqe_next = NULL; 371 eep->eqe_prev = NULL; 372 eep->eqe_data = data; 373 eep->eqe_dump = NULL; 374 375 /* 376 * Once the errorq is initialized, add it to the global list of queues, 377 * and then return a pointer to the new queue to the caller. 378 */ 379 mutex_enter(&errorq_lock); 380 eqp->eq_next = errorq_list; 381 errorq_list = eqp; 382 mutex_exit(&errorq_lock); 383 384 return (eqp); 385 } 386 387 /* 388 * Create a new errorq as if by errorq_create(), but set the ERRORQ_NVLIST 389 * flag and initialize each element to have the start of its data region used 390 * as an errorq_nvelem_t with a nvlist allocator that consumes the data region. 391 */ 392 errorq_t * 393 errorq_nvcreate(const char *name, errorq_func_t func, void *private, 394 ulong_t qlen, size_t size, uint_t ipl, uint_t flags) 395 { 396 errorq_t *eqp; 397 errorq_elem_t *eep; 398 399 eqp = errorq_create(name, func, private, qlen, 400 size + sizeof (errorq_nvelem_t), ipl, flags | ERRORQ_NVLIST); 401 402 if (eqp == NULL) 403 return (NULL); 404 405 mutex_enter(&eqp->eq_lock); 406 407 for (eep = eqp->eq_elems; qlen != 0; eep++, qlen--) { 408 errorq_nvelem_t *eqnp = eep->eqe_data; 409 eqnp->eqn_buf = (char *)eqnp + sizeof (errorq_nvelem_t); 410 eqnp->eqn_nva = fm_nva_xcreate(eqnp->eqn_buf, size); 411 } 412 413 mutex_exit(&eqp->eq_lock); 414 return (eqp); 415 } 416 417 /* 418 * To destroy an error queue, we mark it as disabled and then explicitly drain 419 * all pending errors. Once the drain is complete, we can remove the queue 420 * from the global list of queues examined by errorq_panic(), and then free 421 * the various queue data structures. The caller must use some higher-level 422 * abstraction (e.g. disabling an error interrupt) to ensure that no one will 423 * attempt to enqueue new errors while we are freeing this queue. 424 */ 425 void 426 errorq_destroy(errorq_t *eqp) 427 { 428 errorq_t *p, **pp; 429 errorq_elem_t *eep; 430 ulong_t i; 431 432 ASSERT(eqp != NULL); 433 eqp->eq_flags &= ~ERRORQ_ACTIVE; 434 errorq_drain(eqp); 435 436 mutex_enter(&errorq_lock); 437 pp = &errorq_list; 438 439 for (p = errorq_list; p != NULL; p = p->eq_next) { 440 if (p == eqp) { 441 *pp = p->eq_next; 442 break; 443 } 444 pp = &p->eq_next; 445 } 446 447 mutex_exit(&errorq_lock); 448 ASSERT(p != NULL); 449 450 if (eqp->eq_flags & ERRORQ_NVLIST) { 451 for (eep = eqp->eq_elems, i = 0; i < eqp->eq_qlen; i++, eep++) { 452 errorq_nvelem_t *eqnp = eep->eqe_data; 453 fm_nva_xdestroy(eqnp->eqn_nva); 454 } 455 } 456 457 mutex_destroy(&eqp->eq_lock); 458 kstat_delete(eqp->eq_ksp); 459 460 if (eqp->eq_id != NULL) 461 ddi_remove_softintr(eqp->eq_id); 462 463 kmem_free(eqp->eq_elems, eqp->eq_qlen * sizeof (errorq_elem_t)); 464 kmem_free(eqp->eq_data, eqp->eq_qlen * eqp->eq_size); 465 466 kmem_free(eqp, sizeof (errorq_t)); 467 } 468 469 /* 470 * Dispatch a new error into the queue for later processing. The specified 471 * data buffer is copied into a preallocated queue element. If 'len' is 472 * smaller than the queue element size, the remainder of the queue element is 473 * filled with zeroes. This function may be called from any context subject 474 * to the Platform Considerations described above. 475 */ 476 void 477 errorq_dispatch(errorq_t *eqp, const void *data, size_t len, uint_t flag) 478 { 479 errorq_elem_t *eep, *old; 480 481 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 482 atomic_add_64(&errorq_lost, 1); 483 return; /* drop error if queue is uninitialized or disabled */ 484 } 485 486 while ((eep = eqp->eq_free) != NULL) { 487 if (casptr(&eqp->eq_free, eep, eep->eqe_prev) == eep) 488 break; 489 } 490 491 if (eep == NULL) { 492 atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1); 493 return; 494 } 495 496 ASSERT(len <= eqp->eq_size); 497 bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len)); 498 499 if (len < eqp->eq_size) 500 bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len); 501 502 for (;;) { 503 old = eqp->eq_pend; 504 eep->eqe_prev = old; 505 membar_producer(); 506 507 if (casptr(&eqp->eq_pend, old, eep) == old) 508 break; 509 } 510 511 atomic_add_64(&eqp->eq_kstat.eqk_dispatched.value.ui64, 1); 512 513 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL) 514 ddi_trigger_softintr(eqp->eq_id); 515 } 516 517 /* 518 * Drain the specified error queue by calling eq_func() for each pending error. 519 * This function must be called at or below LOCK_LEVEL or from panic context. 520 * In order to synchronize with other attempts to drain the queue, we acquire 521 * the adaptive eq_lock, blocking other consumers. Once this lock is held, 522 * we must use compare-and-swap to move the pending list to the processing 523 * list and to return elements to the free list in order to synchronize 524 * with producers, who do not acquire any locks and only use compare-and-swap. 525 * 526 * An additional constraint on this function is that if the system panics 527 * while this function is running, the panic code must be able to detect and 528 * handle all intermediate states and correctly dequeue all errors. The 529 * errorq_panic() function below will be used for detecting and handling 530 * these intermediate states. The comments in errorq_drain() below explain 531 * how we make sure each intermediate state is distinct and consistent. 532 */ 533 void 534 errorq_drain(errorq_t *eqp) 535 { 536 errorq_elem_t *eep, *fep, *dep; 537 538 ASSERT(eqp != NULL); 539 mutex_enter(&eqp->eq_lock); 540 541 /* 542 * If there are one or more pending errors, set eq_ptail to point to 543 * the first element on the pending list and then attempt to compare- 544 * and-swap NULL to the pending list. We use membar_producer() to 545 * make sure that eq_ptail will be visible to errorq_panic() below 546 * before the pending list is NULLed out. This section is labeled 547 * case (1) for errorq_panic, below. If eq_ptail is not yet set (1A) 548 * eq_pend has all the pending errors. If casptr fails or has not 549 * been called yet (1B), eq_pend still has all the pending errors. 550 * If casptr succeeds (1C), eq_ptail has all the pending errors. 551 */ 552 while ((eep = eqp->eq_pend) != NULL) { 553 eqp->eq_ptail = eep; 554 membar_producer(); 555 556 if (casptr(&eqp->eq_pend, eep, NULL) == eep) 557 break; 558 } 559 560 /* 561 * If no errors were pending, assert that eq_ptail is set to NULL, 562 * drop the consumer lock, and return without doing anything. 563 */ 564 if (eep == NULL) { 565 ASSERT(eqp->eq_ptail == NULL); 566 mutex_exit(&eqp->eq_lock); 567 return; 568 } 569 570 /* 571 * Now iterate from eq_ptail (a.k.a. eep, the newest error) to the 572 * oldest error, setting the eqe_next pointer so that we can iterate 573 * over the errors from oldest to newest. We use membar_producer() 574 * to make sure that these stores are visible before we set eq_phead. 575 * If we panic before, during, or just after this loop (case 2), 576 * errorq_panic() will simply redo this work, as described below. 577 */ 578 for (eep->eqe_next = NULL; eep->eqe_prev != NULL; eep = eep->eqe_prev) 579 eep->eqe_prev->eqe_next = eep; 580 membar_producer(); 581 582 /* 583 * Now set eq_phead to the head of the processing list (the oldest 584 * error) and issue another membar_producer() to make sure that 585 * eq_phead is seen as non-NULL before we clear eq_ptail. If we panic 586 * after eq_phead is set (case 3), we will detect and log these errors 587 * in errorq_panic(), as described below. 588 */ 589 eqp->eq_phead = eep; 590 membar_producer(); 591 592 eqp->eq_ptail = NULL; 593 membar_producer(); 594 595 /* 596 * If we enter from errorq_panic_drain(), we may already have 597 * errorq elements on the dump list. Find the tail of 598 * the list ready for append. 599 */ 600 if (panicstr && (dep = eqp->eq_dump) != NULL) { 601 while (dep->eqe_dump != NULL) 602 dep = dep->eqe_dump; 603 } 604 605 /* 606 * Now iterate over the processing list from oldest (eq_phead) to 607 * newest and log each error. Once an error is logged, we use 608 * compare-and-swap to return it to the free list. If we panic before, 609 * during, or after calling eq_func() (case 4), the error will still be 610 * found on eq_phead and will be logged in errorq_panic below. 611 */ 612 613 while ((eep = eqp->eq_phead) != NULL) { 614 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep); 615 eqp->eq_kstat.eqk_logged.value.ui64++; 616 617 eqp->eq_phead = eep->eqe_next; 618 membar_producer(); 619 620 eep->eqe_next = NULL; 621 622 for (;;) { 623 fep = eqp->eq_free; 624 eep->eqe_prev = fep; 625 membar_producer(); 626 627 if (casptr(&eqp->eq_free, fep, eep) == fep) 628 break; 629 } 630 631 /* 632 * On panic, we add the element to the dump list for each 633 * nvlist errorq. Elements are stored oldest to newest. 634 */ 635 if (panicstr && (eqp->eq_flags & ERRORQ_NVLIST)) { 636 if (eqp->eq_dump == NULL) 637 dep = eqp->eq_dump = eep; 638 else 639 dep = dep->eqe_dump = eep; 640 membar_producer(); 641 } 642 } 643 644 mutex_exit(&eqp->eq_lock); 645 } 646 647 /* 648 * Now that device tree services are available, set up the soft interrupt 649 * handlers for any queues that were created early in boot. We then 650 * manually drain these queues to report any pending early errors. 651 */ 652 void 653 errorq_init(void) 654 { 655 dev_info_t *dip = ddi_root_node(); 656 ddi_softintr_t id; 657 errorq_t *eqp; 658 659 ASSERT(modrootloaded != 0); 660 ASSERT(dip != NULL); 661 662 mutex_enter(&errorq_lock); 663 664 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 665 ddi_iblock_cookie_t ibc = 666 (ddi_iblock_cookie_t)(uintptr_t)ipltospl(eqp->eq_ipl); 667 668 if (eqp->eq_id != NULL) 669 continue; /* softint already initialized */ 670 671 if (ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id, &ibc, NULL, 672 errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) { 673 panic("errorq_init: failed to register IPL %u softint " 674 "for queue %s", eqp->eq_ipl, eqp->eq_name); 675 } 676 677 eqp->eq_id = id; 678 errorq_drain(eqp); 679 } 680 681 mutex_exit(&errorq_lock); 682 } 683 684 /* 685 * This function is designed to be called from panic context only, and 686 * therefore does not need to acquire errorq_lock when iterating over 687 * errorq_list. This function must be called no more than once for each 688 * 'what' value (if you change this then review the manipulation of 'dep'. 689 */ 690 static uint64_t 691 errorq_panic_drain(uint_t what) 692 { 693 errorq_elem_t *eep, *nep, *fep, *dep; 694 errorq_t *eqp; 695 uint64_t loggedtmp; 696 uint64_t logged = 0; 697 698 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 699 if ((eqp->eq_flags & (ERRORQ_VITAL | ERRORQ_NVLIST)) != what) 700 continue; /* do not drain this queue on this pass */ 701 702 loggedtmp = eqp->eq_kstat.eqk_logged.value.ui64; 703 704 /* 705 * In case (1B) above, eq_ptail may be set but the casptr may 706 * not have been executed yet or may have failed. Either way, 707 * we must log errors in chronological order. So we search 708 * the pending list for the error pointed to by eq_ptail. If 709 * it is found, we know that all subsequent errors are also 710 * still on the pending list, so just NULL out eq_ptail and let 711 * errorq_drain(), below, take care of the logging. 712 */ 713 for (eep = eqp->eq_pend; eep != NULL; eep = eep->eqe_prev) { 714 if (eep == eqp->eq_ptail) { 715 ASSERT(eqp->eq_phead == NULL); 716 eqp->eq_ptail = NULL; 717 break; 718 } 719 } 720 721 /* 722 * In cases (1C) and (2) above, eq_ptail will be set to the 723 * newest error on the processing list but eq_phead will still 724 * be NULL. We set the eqe_next pointers so we can iterate 725 * over the processing list in order from oldest error to the 726 * newest error. We then set eq_phead to point to the oldest 727 * error and fall into the for-loop below. 728 */ 729 if (eqp->eq_phead == NULL && (eep = eqp->eq_ptail) != NULL) { 730 for (eep->eqe_next = NULL; eep->eqe_prev != NULL; 731 eep = eep->eqe_prev) 732 eep->eqe_prev->eqe_next = eep; 733 734 eqp->eq_phead = eep; 735 eqp->eq_ptail = NULL; 736 } 737 738 /* 739 * In cases (3) and (4) above (or after case (1C/2) handling), 740 * eq_phead will be set to the oldest error on the processing 741 * list. We log each error and return it to the free list. 742 * 743 * Unlike errorq_drain(), we don't need to worry about updating 744 * eq_phead because errorq_panic() will be called at most once. 745 * However, we must use casptr to update the freelist in case 746 * errors are still being enqueued during panic. 747 */ 748 for (eep = eqp->eq_phead; eep != NULL; eep = nep) { 749 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep); 750 eqp->eq_kstat.eqk_logged.value.ui64++; 751 752 nep = eep->eqe_next; 753 eep->eqe_next = NULL; 754 755 for (;;) { 756 fep = eqp->eq_free; 757 eep->eqe_prev = fep; 758 membar_producer(); 759 760 if (casptr(&eqp->eq_free, fep, eep) == fep) 761 break; 762 } 763 764 /* 765 * On panic, we add the element to the dump list for 766 * each nvlist errorq, stored oldest to newest. 767 */ 768 if (eqp->eq_flags & ERRORQ_NVLIST) { 769 if (eqp->eq_dump == NULL) 770 dep = eqp->eq_dump = eep; 771 else 772 dep = dep->eqe_dump = eep; 773 membar_producer(); 774 } 775 } 776 777 /* 778 * Now go ahead and drain any other errors on the pending list. 779 * This call transparently handles case (1A) above, as well as 780 * any other errors that were dispatched after errorq_drain() 781 * completed its first compare-and-swap. 782 */ 783 errorq_drain(eqp); 784 785 logged += eqp->eq_kstat.eqk_logged.value.ui64 - loggedtmp; 786 } 787 return (logged); 788 } 789 790 /* 791 * Drain all error queues - called only from panic context. Some drain 792 * functions may enqueue errors to ERRORQ_NVLIST error queues so that 793 * they may be written out in the panic dump - so ERRORQ_NVLIST queues 794 * must be drained last. Drain ERRORQ_VITAL queues before nonvital queues 795 * so that vital errors get to fill the ERRORQ_NVLIST queues first, and 796 * do not drain the nonvital queues if there are many vital errors. 797 */ 798 void 799 errorq_panic(void) 800 { 801 ASSERT(panicstr != NULL); 802 803 if (errorq_panic_drain(ERRORQ_VITAL) <= errorq_vitalmin) 804 (void) errorq_panic_drain(0); 805 (void) errorq_panic_drain(ERRORQ_VITAL | ERRORQ_NVLIST); 806 (void) errorq_panic_drain(ERRORQ_NVLIST); 807 } 808 809 /* 810 * Reserve an error queue element for later processing and dispatching. The 811 * element is returned to the caller who may add error-specific data to 812 * element. The element is retured to the free list when either 813 * errorq_commit() is called and the element asynchronously processed 814 * or immediately when errorq_cancel() is called. 815 */ 816 errorq_elem_t * 817 errorq_reserve(errorq_t *eqp) 818 { 819 errorq_elem_t *eqep; 820 821 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 822 atomic_add_64(&errorq_lost, 1); 823 return (NULL); 824 } 825 826 while ((eqep = eqp->eq_free) != NULL) { 827 if (casptr(&eqp->eq_free, eqep, eqep->eqe_prev) == eqep) 828 break; 829 } 830 831 if (eqep == NULL) { 832 atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1); 833 return (NULL); 834 } 835 836 if (eqp->eq_flags & ERRORQ_NVLIST) { 837 errorq_nvelem_t *eqnp = eqep->eqe_data; 838 nv_alloc_reset(eqnp->eqn_nva); 839 eqnp->eqn_nvl = fm_nvlist_create(eqnp->eqn_nva); 840 } 841 842 atomic_add_64(&eqp->eq_kstat.eqk_reserved.value.ui64, 1); 843 return (eqep); 844 } 845 846 /* 847 * Commit an errorq element (eqep) for dispatching. 848 * This function may be called from any context subject 849 * to the Platform Considerations described above. 850 */ 851 void 852 errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag) 853 { 854 errorq_elem_t *old; 855 856 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 857 atomic_add_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64, 1); 858 return; 859 } 860 861 for (;;) { 862 old = eqp->eq_pend; 863 eqep->eqe_prev = old; 864 membar_producer(); 865 866 if (casptr(&eqp->eq_pend, old, eqep) == old) 867 break; 868 } 869 870 atomic_add_64(&eqp->eq_kstat.eqk_committed.value.ui64, 1); 871 872 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL) 873 ddi_trigger_softintr(eqp->eq_id); 874 } 875 876 /* 877 * Cancel an errorq element reservation by returning the specified element 878 * to the free list. Duplicate or invalid frees are not supported. 879 */ 880 void 881 errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep) 882 { 883 errorq_elem_t *fep; 884 885 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) 886 return; 887 888 for (;;) { 889 fep = eqp->eq_free; 890 eqep->eqe_prev = fep; 891 membar_producer(); 892 893 if (casptr(&eqp->eq_free, fep, eqep) == fep) 894 break; 895 } 896 897 atomic_add_64(&eqp->eq_kstat.eqk_cancelled.value.ui64, 1); 898 } 899 900 /* 901 * Write elements on the dump list of each nvlist errorq to the dump device. 902 * Upon reboot, fmd(1M) will extract and replay them for diagnosis. 903 */ 904 void 905 errorq_dump(void) 906 { 907 errorq_elem_t *eep; 908 errorq_t *eqp; 909 910 if (ereport_dumpbuf == NULL) 911 return; /* reboot or panic before errorq is even set up */ 912 913 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 914 if (!(eqp->eq_flags & ERRORQ_NVLIST) || 915 !(eqp->eq_flags & ERRORQ_ACTIVE)) 916 continue; /* do not dump this queue on panic */ 917 918 for (eep = eqp->eq_dump; eep != NULL; eep = eep->eqe_dump) { 919 errorq_nvelem_t *eqnp = eep->eqe_data; 920 size_t len = 0; 921 erpt_dump_t ed; 922 int err; 923 924 (void) nvlist_size(eqnp->eqn_nvl, 925 &len, NV_ENCODE_NATIVE); 926 927 if (len > ereport_dumplen || len == 0) { 928 cmn_err(CE_WARN, "%s: unable to save error " 929 "report %p due to size %lu\n", 930 eqp->eq_name, (void *)eep, len); 931 continue; 932 } 933 934 if ((err = nvlist_pack(eqnp->eqn_nvl, 935 (char **)&ereport_dumpbuf, &ereport_dumplen, 936 NV_ENCODE_NATIVE, KM_NOSLEEP)) != 0) { 937 cmn_err(CE_WARN, "%s: unable to save error " 938 "report %p due to pack error %d\n", 939 eqp->eq_name, (void *)eep, err); 940 continue; 941 } 942 943 ed.ed_magic = ERPT_MAGIC; 944 ed.ed_chksum = checksum32(ereport_dumpbuf, len); 945 ed.ed_size = (uint32_t)len; 946 ed.ed_pad = 0; 947 ed.ed_hrt_nsec = 0; 948 ed.ed_hrt_base = panic_hrtime; 949 ed.ed_tod_base.sec = panic_hrestime.tv_sec; 950 ed.ed_tod_base.nsec = panic_hrestime.tv_nsec; 951 952 dumpvp_write(&ed, sizeof (ed)); 953 dumpvp_write(ereport_dumpbuf, len); 954 } 955 } 956 } 957 958 nvlist_t * 959 errorq_elem_nvl(errorq_t *eqp, const errorq_elem_t *eqep) 960 { 961 errorq_nvelem_t *eqnp = eqep->eqe_data; 962 963 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST); 964 965 return (eqnp->eqn_nvl); 966 } 967 968 nv_alloc_t * 969 errorq_elem_nva(errorq_t *eqp, const errorq_elem_t *eqep) 970 { 971 errorq_nvelem_t *eqnp = eqep->eqe_data; 972 973 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST); 974 975 return (eqnp->eqn_nva); 976 } 977 978 /* 979 * Reserve a new element and duplicate the data of the original into it. 980 */ 981 void * 982 errorq_elem_dup(errorq_t *eqp, const errorq_elem_t *eqep, errorq_elem_t **neqep) 983 { 984 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE); 985 ASSERT(!(eqp->eq_flags & ERRORQ_NVLIST)); 986 987 if ((*neqep = errorq_reserve(eqp)) == NULL) 988 return (NULL); 989 990 bcopy(eqep->eqe_data, (*neqep)->eqe_data, eqp->eq_size); 991 return ((*neqep)->eqe_data); 992 } 993