1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Kernel Error Queues 30 * 31 * A common problem when handling hardware error traps and interrupts is that 32 * these errors frequently must be handled at high interrupt level, where 33 * reliably producing error messages and safely examining and manipulating 34 * other kernel state may not be possible. The kernel error queue primitive is 35 * a common set of routines that allow a subsystem to maintain a queue of 36 * errors that can be processed by an explicit call from a safe context or by a 37 * soft interrupt that fires at a specific lower interrupt level. The queue 38 * management code also ensures that if the system panics, all in-transit 39 * errors are logged prior to reset. Each queue has an associated kstat for 40 * observing the number of errors dispatched and logged, and mdb(1) debugging 41 * support is provided for live and post-mortem observability. 42 * 43 * Memory Allocation 44 * 45 * All of the queue data structures are allocated in advance as part of 46 * the errorq_create() call. No additional memory allocations are 47 * performed as part of errorq_dispatch(), errorq_reserve(), 48 * errorq_commit() or errorq_drain(). This design 49 * facilitates reliable error queue processing even when the system is low 50 * on memory, and ensures that errorq_dispatch() can be called from any 51 * context. When the queue is created, the maximum queue length is 52 * specified as a parameter to errorq_create() errorq_nvcreate(). This 53 * length should represent a reasonable upper bound on the number of 54 * simultaneous errors. If errorq_dispatch() or errorq_reserve() is 55 * invoked and no free queue elements are available, the error is 56 * dropped and will not be logged. Typically, the queue will only be 57 * exhausted by an error storm, and in this case 58 * the earlier errors provide the most important data for analysis. 59 * When a new error is dispatched, the error data is copied into the 60 * preallocated queue element so that the caller's buffer can be reused. 61 * 62 * When a new error is reserved, an element is moved from the free list 63 * and returned to the caller. The element buffer data, eqe_data, may be 64 * managed by the caller and dispatched to the errorq by calling 65 * errorq_commit(). This is useful for additions to errorq's 66 * created with errorq_nvcreate() to handle name-value pair (nvpair) data. 67 * See below for a discussion on nvlist errorq's. 68 * 69 * Queue Drain Callback 70 * 71 * When the error queue is drained, the caller's queue drain callback is 72 * invoked with a pointer to the saved error data. This function may be 73 * called from passive kernel context or soft interrupt context at or 74 * below LOCK_LEVEL, or as part of panic(). As such, the callback should 75 * basically only be calling cmn_err (but NOT with the CE_PANIC flag). 76 * The callback must not call panic(), attempt to allocate memory, or wait 77 * on a condition variable. The callback may not call errorq_destroy() 78 * or errorq_drain() on the same error queue that called it. 79 * 80 * The queue drain callback will always be called for each pending error 81 * in the order in which errors were enqueued (oldest to newest). The 82 * queue drain callback is guaranteed to provide at *least* once semantics 83 * for all errors that are successfully dispatched (i.e. for which 84 * errorq_dispatch() has successfully completed). If an unrelated panic 85 * occurs while the queue drain callback is running on a vital queue, the 86 * panic subsystem will continue the queue drain and the callback may be 87 * invoked again for the same error. Therefore, the callback should 88 * restrict itself to logging messages and taking other actions that are 89 * not destructive if repeated. 90 * 91 * Name-Value Pair Error Queues 92 * 93 * During error handling, it may be more convenient to store error 94 * queue element data as a fixed buffer of name-value pairs. The 95 * nvpair library allows construction and destruction of nvlists in 96 * in pre-allocated memory buffers. 97 * 98 * Error queues created via errorq_nvcreate() store queue element 99 * data as fixed buffer nvlists (ereports). errorq_reserve() 100 * allocates an errorq element from eqp->eq_free and returns a valid 101 * pointer to a errorq_elem_t (queue element) and a pre-allocated 102 * fixed buffer nvlist. errorq_elem_nvl() is used to gain access 103 * to the nvlist to add name-value ereport members prior to 104 * dispatching the error queue element in errorq_commit(). 105 * 106 * Once dispatched, the drain function will return the element to 107 * eqp->eq_free and reset the associated nv_alloc structure. 108 * error_cancel() may be called to cancel an element reservation 109 * element that was never dispatched (committed). This is useful in 110 * cases where a programming error prevents a queue element from being 111 * dispatched. 112 * 113 * Queue Management 114 * 115 * The queue element structures and error data buffers are allocated in 116 * two contiguous chunks as part of errorq_create() or errorq_nvcreate(). 117 * Each queue element structure contains a next pointer, 118 * a previous pointer, and a pointer to the corresponding error data 119 * buffer. The data buffer for a nvlist errorq is a shared buffer 120 * for the allocation of name-value pair lists. The elements are kept on 121 * one of three lists: 122 * 123 * Unused elements are kept on the free list, a singly-linked list pointed 124 * to by eqp->eq_free, and linked together using eqe_prev. The eqe_next 125 * pointer is not used by the free list and will be set to NULL. 126 * 127 * Pending errors are kept on the pending list, a singly-linked list 128 * pointed to by eqp->eq_pend, and linked together using eqe_prev. This 129 * list is maintained in order from newest error to oldest. The eqe_next 130 * pointer is not used by the pending list and will be set to NULL. 131 * 132 * The processing list is a doubly-linked list pointed to by eqp->eq_phead 133 * (the oldest element) and eqp->eq_ptail (the newest element). The 134 * eqe_next pointer is used to traverse from eq_phead to eq_ptail, and the 135 * eqe_prev pointer is used to traverse from eq_ptail to eq_phead. Once a 136 * queue drain operation begins, the current pending list is moved to the 137 * processing list in a two-phase commit fashion, allowing the panic code 138 * to always locate and process all pending errors in the event that a 139 * panic occurs in the middle of queue processing. 140 * 141 * A fourth list is maintained for nvlist errorqs. The dump list, 142 * eq_dump is used to link all errorq elements that should be stored 143 * in a crash dump file in the event of a system panic. During 144 * errorq_panic(), the list is created and subsequently traversed 145 * in errorq_dump() during the final phases of a crash dump. 146 * 147 * Platform Considerations 148 * 149 * In order to simplify their implementation, error queues make use of the 150 * C wrappers for compare-and-swap. If the platform itself does not 151 * support compare-and-swap in hardware and the kernel emulation routines 152 * are used instead, then the context in which errorq_dispatch() can be 153 * safely invoked is further constrained by the implementation of the 154 * compare-and-swap emulation. Specifically, if errorq_dispatch() is 155 * called from a code path that can be executed above ATOMIC_LEVEL on such 156 * a platform, the dispatch code could potentially deadlock unless the 157 * corresponding error interrupt is blocked or disabled prior to calling 158 * errorq_dispatch(). Error queues should therefore be deployed with 159 * caution on these platforms. 160 * 161 * Interfaces 162 * 163 * errorq_t *errorq_create(name, func, private, qlen, eltsize, ipl, flags); 164 * errorq_t *errorq_nvcreate(name, func, private, qlen, eltsize, ipl, flags); 165 * 166 * Create a new error queue with the specified name, callback, and 167 * properties. A pointer to the new error queue is returned upon success, 168 * or NULL is returned to indicate that the queue could not be created. 169 * This function must be called from passive kernel context with no locks 170 * held that can prevent a sleeping memory allocation from occurring. 171 * errorq_create() will return failure if the queue kstats cannot be 172 * created, or if a soft interrupt handler cannot be registered. 173 * 174 * The queue 'name' is a string that is recorded for live and post-mortem 175 * examination by a debugger. The queue callback 'func' will be invoked 176 * for each error drained from the queue, and will receive the 'private' 177 * pointer as its first argument. The callback must obey the rules for 178 * callbacks described above. The queue will have maximum length 'qlen' 179 * and each element will be able to record up to 'eltsize' bytes of data. 180 * The queue's soft interrupt (see errorq_dispatch(), below) will fire 181 * at 'ipl', which should not exceed LOCK_LEVEL. The queue 'flags' may 182 * include the following flag: 183 * 184 * ERRORQ_VITAL - This queue contains information that is considered 185 * vital to problem diagnosis. Error queues that are marked vital will 186 * be automatically drained by the panic subsystem prior to printing 187 * the panic messages to the console. 188 * 189 * void errorq_destroy(errorq); 190 * 191 * Destroy the specified error queue. The queue is drained of any 192 * pending elements and these are logged before errorq_destroy returns. 193 * Once errorq_destroy() begins draining the queue, any simultaneous 194 * calls to dispatch errors will result in the errors being dropped. 195 * The caller must invoke a higher-level abstraction (e.g. disabling 196 * an error interrupt) to ensure that error handling code does not 197 * attempt to dispatch errors to the queue while it is being freed. 198 * 199 * void errorq_dispatch(errorq, data, len, flag); 200 * 201 * Attempt to enqueue the specified error data. If a free queue element 202 * is available, the data is copied into a free element and placed on a 203 * pending list. If no free queue element is available, the error is 204 * dropped. The data length (len) is specified in bytes and should not 205 * exceed the queue's maximum element size. If the data length is less 206 * than the maximum element size, the remainder of the queue element is 207 * filled with zeroes. The flag parameter should be one of: 208 * 209 * ERRORQ_ASYNC - Schedule a soft interrupt at the previously specified 210 * IPL to asynchronously drain the queue on behalf of the caller. 211 * 212 * ERRORQ_SYNC - Do not schedule a soft interrupt to drain the queue. 213 * The caller is presumed to be calling errorq_drain() or panic() in 214 * the near future in order to drain the queue and log the error. 215 * 216 * The errorq_dispatch() function may be called from any context, subject 217 * to the Platform Considerations described above. 218 * 219 * void errorq_drain(errorq); 220 * 221 * Drain the error queue of all pending errors. The queue's callback 222 * function is invoked for each error in order from oldest to newest. 223 * This function may be used at or below LOCK_LEVEL or from panic context. 224 * 225 * errorq_elem_t *errorq_reserve(errorq); 226 * 227 * Reserve an error queue element for later processing and dispatching. 228 * The element is returned to the caller who may add error-specific data 229 * to element. The element is retured to the free list when either 230 * errorq_commit() is called and the element asynchronously processed 231 * or immediately when errorq_cancel() is called. 232 * 233 * void errorq_commit(errorq, errorq_elem, flag); 234 * 235 * Commit an errorq element (eqep) for dispatching, see 236 * errorq_dispatch(). 237 * 238 * void errorq_cancel(errorq, errorq_elem); 239 * 240 * Cancel a pending errorq element reservation. The errorq element is 241 * returned to the free list upon cancelation. 242 */ 243 244 #include <sys/errorq_impl.h> 245 #include <sys/sysmacros.h> 246 #include <sys/machlock.h> 247 #include <sys/cmn_err.h> 248 #include <sys/atomic.h> 249 #include <sys/systm.h> 250 #include <sys/kmem.h> 251 #include <sys/conf.h> 252 #include <sys/ddi.h> 253 #include <sys/sunddi.h> 254 #include <sys/bootconf.h> 255 #include <sys/spl.h> 256 #include <sys/dumphdr.h> 257 #include <sys/compress.h> 258 #include <sys/time.h> 259 #include <sys/panic.h> 260 #include <sys/fm/protocol.h> 261 #include <sys/fm/util.h> 262 263 static struct errorq_kstat errorq_kstat_template = { 264 { "dispatched", KSTAT_DATA_UINT64 }, 265 { "dropped", KSTAT_DATA_UINT64 }, 266 { "logged", KSTAT_DATA_UINT64 }, 267 { "reserved", KSTAT_DATA_UINT64 }, 268 { "reserve_fail", KSTAT_DATA_UINT64 }, 269 { "committed", KSTAT_DATA_UINT64 }, 270 { "commit_fail", KSTAT_DATA_UINT64 }, 271 { "cancelled", KSTAT_DATA_UINT64 } 272 }; 273 274 static uint64_t errorq_lost = 0; 275 static errorq_t *errorq_list = NULL; 276 static kmutex_t errorq_lock; 277 static uint64_t errorq_vitalmin = 5; 278 279 static uint_t 280 errorq_intr(caddr_t eqp) 281 { 282 errorq_drain((errorq_t *)eqp); 283 return (DDI_INTR_CLAIMED); 284 } 285 286 /* 287 * Create a new error queue with the specified properties and add a software 288 * interrupt handler and kstat for it. This function must be called from 289 * passive kernel context with no locks held that can prevent a sleeping 290 * memory allocation from occurring. This function will return NULL if the 291 * softint or kstat for this queue cannot be created. 292 */ 293 errorq_t * 294 errorq_create(const char *name, errorq_func_t func, void *private, 295 ulong_t qlen, size_t size, uint_t ipl, uint_t flags) 296 { 297 errorq_t *eqp = kmem_alloc(sizeof (errorq_t), KM_SLEEP); 298 ddi_iblock_cookie_t ibc = (ddi_iblock_cookie_t)(uintptr_t)ipltospl(ipl); 299 dev_info_t *dip = ddi_root_node(); 300 301 errorq_elem_t *eep; 302 ddi_softintr_t id = NULL; 303 caddr_t data; 304 305 ASSERT(qlen != 0 && size != 0); 306 ASSERT(ipl > 0 && ipl <= LOCK_LEVEL); 307 308 /* 309 * If a queue is created very early in boot before device tree services 310 * are available, the queue softint handler cannot be created. We 311 * manually drain these queues and create their softint handlers when 312 * it is safe to do so as part of errorq_init(), below. 313 */ 314 if (modrootloaded && ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id, 315 &ibc, NULL, errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) { 316 cmn_err(CE_WARN, "errorq_create: failed to register " 317 "IPL %u softint for queue %s", ipl, name); 318 kmem_free(eqp, sizeof (errorq_t)); 319 return (NULL); 320 } 321 322 if ((eqp->eq_ksp = kstat_create("unix", 0, name, "errorq", 323 KSTAT_TYPE_NAMED, sizeof (struct errorq_kstat) / 324 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL)) == NULL) { 325 cmn_err(CE_WARN, "errorq_create: failed to create kstat " 326 "for queue %s", name); 327 if (id != NULL) 328 ddi_remove_softintr(id); 329 kmem_free(eqp, sizeof (errorq_t)); 330 return (NULL); 331 } 332 333 bcopy(&errorq_kstat_template, &eqp->eq_kstat, 334 sizeof (struct errorq_kstat)); 335 eqp->eq_ksp->ks_data = &eqp->eq_kstat; 336 eqp->eq_ksp->ks_private = eqp; 337 kstat_install(eqp->eq_ksp); 338 339 (void) strncpy(eqp->eq_name, name, ERRORQ_NAMELEN); 340 eqp->eq_name[ERRORQ_NAMELEN] = '\0'; 341 eqp->eq_func = func; 342 eqp->eq_private = private; 343 eqp->eq_data = kmem_alloc(qlen * size, KM_SLEEP); 344 eqp->eq_qlen = qlen; 345 eqp->eq_size = size; 346 eqp->eq_ipl = ipl; 347 eqp->eq_flags = flags | ERRORQ_ACTIVE; 348 eqp->eq_id = id; 349 mutex_init(&eqp->eq_lock, NULL, MUTEX_DEFAULT, NULL); 350 eqp->eq_elems = kmem_alloc(qlen * sizeof (errorq_elem_t), KM_SLEEP); 351 eqp->eq_phead = NULL; 352 eqp->eq_ptail = NULL; 353 eqp->eq_pend = NULL; 354 eqp->eq_dump = NULL; 355 eqp->eq_free = eqp->eq_elems; 356 357 /* 358 * Iterate over the array of errorq_elem_t structures and place each 359 * one on the free list and set its data pointer. 360 */ 361 for (eep = eqp->eq_free, data = eqp->eq_data; qlen > 1; qlen--) { 362 eep->eqe_next = NULL; 363 eep->eqe_dump = NULL; 364 eep->eqe_prev = eep + 1; 365 eep->eqe_data = data; 366 data += size; 367 eep++; 368 } 369 370 eep->eqe_next = NULL; 371 eep->eqe_prev = NULL; 372 eep->eqe_data = data; 373 eep->eqe_dump = NULL; 374 375 /* 376 * Once the errorq is initialized, add it to the global list of queues, 377 * and then return a pointer to the new queue to the caller. 378 */ 379 mutex_enter(&errorq_lock); 380 eqp->eq_next = errorq_list; 381 errorq_list = eqp; 382 mutex_exit(&errorq_lock); 383 384 return (eqp); 385 } 386 387 /* 388 * Create a new errorq as if by errorq_create(), but set the ERRORQ_NVLIST 389 * flag and initialize each element to have the start of its data region used 390 * as an errorq_nvelem_t with a nvlist allocator that consumes the data region. 391 */ 392 errorq_t * 393 errorq_nvcreate(const char *name, errorq_func_t func, void *private, 394 ulong_t qlen, size_t size, uint_t ipl, uint_t flags) 395 { 396 errorq_t *eqp; 397 errorq_elem_t *eep; 398 399 eqp = errorq_create(name, func, private, qlen, 400 size + sizeof (errorq_nvelem_t), ipl, flags | ERRORQ_NVLIST); 401 402 if (eqp == NULL) 403 return (NULL); 404 405 mutex_enter(&eqp->eq_lock); 406 407 for (eep = eqp->eq_elems; qlen != 0; eep++, qlen--) { 408 errorq_nvelem_t *eqnp = eep->eqe_data; 409 eqnp->eqn_buf = (char *)eqnp + sizeof (errorq_nvelem_t); 410 eqnp->eqn_nva = fm_nva_xcreate(eqnp->eqn_buf, size); 411 } 412 413 mutex_exit(&eqp->eq_lock); 414 return (eqp); 415 } 416 417 /* 418 * To destroy an error queue, we mark it as disabled and then explicitly drain 419 * all pending errors. Once the drain is complete, we can remove the queue 420 * from the global list of queues examined by errorq_panic(), and then free 421 * the various queue data structures. The caller must use some higher-level 422 * abstraction (e.g. disabling an error interrupt) to ensure that no one will 423 * attempt to enqueue new errors while we are freeing this queue. 424 */ 425 void 426 errorq_destroy(errorq_t *eqp) 427 { 428 errorq_t *p, **pp; 429 errorq_elem_t *eep; 430 ulong_t i; 431 432 ASSERT(eqp != NULL); 433 eqp->eq_flags &= ~ERRORQ_ACTIVE; 434 errorq_drain(eqp); 435 436 mutex_enter(&errorq_lock); 437 pp = &errorq_list; 438 439 for (p = errorq_list; p != NULL; p = p->eq_next) { 440 if (p == eqp) { 441 *pp = p->eq_next; 442 break; 443 } 444 pp = &p->eq_next; 445 } 446 447 mutex_exit(&errorq_lock); 448 ASSERT(p != NULL); 449 450 if (eqp->eq_flags & ERRORQ_NVLIST) { 451 for (eep = eqp->eq_elems, i = 0; i < eqp->eq_qlen; i++, eep++) { 452 errorq_nvelem_t *eqnp = eep->eqe_data; 453 fm_nva_xdestroy(eqnp->eqn_nva); 454 } 455 } 456 457 mutex_destroy(&eqp->eq_lock); 458 kstat_delete(eqp->eq_ksp); 459 460 if (eqp->eq_id != NULL) 461 ddi_remove_softintr(eqp->eq_id); 462 463 kmem_free(eqp->eq_elems, eqp->eq_qlen * sizeof (errorq_elem_t)); 464 kmem_free(eqp->eq_data, eqp->eq_qlen * eqp->eq_size); 465 466 kmem_free(eqp, sizeof (errorq_t)); 467 } 468 469 /* 470 * Dispatch a new error into the queue for later processing. The specified 471 * data buffer is copied into a preallocated queue element. If 'len' is 472 * smaller than the queue element size, the remainder of the queue element is 473 * filled with zeroes. This function may be called from any context subject 474 * to the Platform Considerations described above. 475 */ 476 void 477 errorq_dispatch(errorq_t *eqp, const void *data, size_t len, uint_t flag) 478 { 479 errorq_elem_t *eep, *old; 480 481 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 482 atomic_add_64(&errorq_lost, 1); 483 return; /* drop error if queue is uninitialized or disabled */ 484 } 485 486 while ((eep = eqp->eq_free) != NULL) { 487 if (casptr(&eqp->eq_free, eep, eep->eqe_prev) == eep) 488 break; 489 } 490 491 if (eep == NULL) { 492 atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1); 493 return; 494 } 495 496 ASSERT(len <= eqp->eq_size); 497 bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len)); 498 499 if (len < eqp->eq_size) 500 bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len); 501 502 for (;;) { 503 old = eqp->eq_pend; 504 eep->eqe_prev = old; 505 membar_producer(); 506 507 if (casptr(&eqp->eq_pend, old, eep) == old) 508 break; 509 } 510 511 atomic_add_64(&eqp->eq_kstat.eqk_dispatched.value.ui64, 1); 512 513 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL) 514 ddi_trigger_softintr(eqp->eq_id); 515 } 516 517 /* 518 * Drain the specified error queue by calling eq_func() for each pending error. 519 * This function must be called at or below LOCK_LEVEL or from panic context. 520 * In order to synchronize with other attempts to drain the queue, we acquire 521 * the adaptive eq_lock, blocking other consumers. Once this lock is held, 522 * we must use compare-and-swap to move the pending list to the processing 523 * list and to return elements to the free list in order to synchronize 524 * with producers, who do not acquire any locks and only use compare-and-swap. 525 * 526 * An additional constraint on this function is that if the system panics 527 * while this function is running, the panic code must be able to detect and 528 * handle all intermediate states and correctly dequeue all errors. The 529 * errorq_panic() function below will be used for detecting and handling 530 * these intermediate states. The comments in errorq_drain() below explain 531 * how we make sure each intermediate state is distinct and consistent. 532 */ 533 void 534 errorq_drain(errorq_t *eqp) 535 { 536 errorq_elem_t *eep, *fep, *dep; 537 538 ASSERT(eqp != NULL); 539 mutex_enter(&eqp->eq_lock); 540 541 /* 542 * If there are one or more pending errors, set eq_ptail to point to 543 * the first element on the pending list and then attempt to compare- 544 * and-swap NULL to the pending list. We use membar_producer() to 545 * make sure that eq_ptail will be visible to errorq_panic() below 546 * before the pending list is NULLed out. This section is labeled 547 * case (1) for errorq_panic, below. If eq_ptail is not yet set (1A) 548 * eq_pend has all the pending errors. If casptr fails or has not 549 * been called yet (1B), eq_pend still has all the pending errors. 550 * If casptr succeeds (1C), eq_ptail has all the pending errors. 551 */ 552 while ((eep = eqp->eq_pend) != NULL) { 553 eqp->eq_ptail = eep; 554 membar_producer(); 555 556 if (casptr(&eqp->eq_pend, eep, NULL) == eep) 557 break; 558 } 559 560 /* 561 * If no errors were pending, assert that eq_ptail is set to NULL, 562 * drop the consumer lock, and return without doing anything. 563 */ 564 if (eep == NULL) { 565 ASSERT(eqp->eq_ptail == NULL); 566 mutex_exit(&eqp->eq_lock); 567 return; 568 } 569 570 /* 571 * Now iterate from eq_ptail (a.k.a. eep, the newest error) to the 572 * oldest error, setting the eqe_next pointer so that we can iterate 573 * over the errors from oldest to newest. We use membar_producer() 574 * to make sure that these stores are visible before we set eq_phead. 575 * If we panic before, during, or just after this loop (case 2), 576 * errorq_panic() will simply redo this work, as described below. 577 */ 578 for (eep->eqe_next = NULL; eep->eqe_prev != NULL; eep = eep->eqe_prev) 579 eep->eqe_prev->eqe_next = eep; 580 membar_producer(); 581 582 /* 583 * Now set eq_phead to the head of the processing list (the oldest 584 * error) and issue another membar_producer() to make sure that 585 * eq_phead is seen as non-NULL before we clear eq_ptail. If we panic 586 * after eq_phead is set (case 3), we will detect and log these errors 587 * in errorq_panic(), as described below. 588 */ 589 eqp->eq_phead = eep; 590 membar_producer(); 591 592 eqp->eq_ptail = NULL; 593 membar_producer(); 594 595 /* 596 * If we enter from errorq_panic_drain(), we may already have 597 * errorq elements on the dump list. Find the tail of 598 * the list ready for append. 599 */ 600 if (panicstr && (dep = eqp->eq_dump) != NULL) { 601 while (dep->eqe_dump != NULL) 602 dep = dep->eqe_dump; 603 } 604 605 /* 606 * Now iterate over the processing list from oldest (eq_phead) to 607 * newest and log each error. Once an error is logged, we use 608 * compare-and-swap to return it to the free list. If we panic before, 609 * during, or after calling eq_func() (case 4), the error will still be 610 * found on eq_phead and will be logged in errorq_panic below. 611 */ 612 613 while ((eep = eqp->eq_phead) != NULL) { 614 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep); 615 eqp->eq_kstat.eqk_logged.value.ui64++; 616 617 eqp->eq_phead = eep->eqe_next; 618 membar_producer(); 619 620 eep->eqe_next = NULL; 621 622 /* 623 * On panic, we add the element to the dump list for each 624 * nvlist errorq. Elements are stored oldest to newest. 625 * Then continue, so we don't free and subsequently overwrite 626 * any elements which we've put on the dump queue. 627 */ 628 if (panicstr && (eqp->eq_flags & ERRORQ_NVLIST)) { 629 if (eqp->eq_dump == NULL) 630 dep = eqp->eq_dump = eep; 631 else 632 dep = dep->eqe_dump = eep; 633 membar_producer(); 634 continue; 635 } 636 637 for (;;) { 638 fep = eqp->eq_free; 639 eep->eqe_prev = fep; 640 membar_producer(); 641 642 if (casptr(&eqp->eq_free, fep, eep) == fep) 643 break; 644 } 645 } 646 647 mutex_exit(&eqp->eq_lock); 648 } 649 650 /* 651 * Now that device tree services are available, set up the soft interrupt 652 * handlers for any queues that were created early in boot. We then 653 * manually drain these queues to report any pending early errors. 654 */ 655 void 656 errorq_init(void) 657 { 658 dev_info_t *dip = ddi_root_node(); 659 ddi_softintr_t id; 660 errorq_t *eqp; 661 662 ASSERT(modrootloaded != 0); 663 ASSERT(dip != NULL); 664 665 mutex_enter(&errorq_lock); 666 667 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 668 ddi_iblock_cookie_t ibc = 669 (ddi_iblock_cookie_t)(uintptr_t)ipltospl(eqp->eq_ipl); 670 671 if (eqp->eq_id != NULL) 672 continue; /* softint already initialized */ 673 674 if (ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id, &ibc, NULL, 675 errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) { 676 panic("errorq_init: failed to register IPL %u softint " 677 "for queue %s", eqp->eq_ipl, eqp->eq_name); 678 } 679 680 eqp->eq_id = id; 681 errorq_drain(eqp); 682 } 683 684 mutex_exit(&errorq_lock); 685 } 686 687 /* 688 * This function is designed to be called from panic context only, and 689 * therefore does not need to acquire errorq_lock when iterating over 690 * errorq_list. This function must be called no more than once for each 691 * 'what' value (if you change this then review the manipulation of 'dep'. 692 */ 693 static uint64_t 694 errorq_panic_drain(uint_t what) 695 { 696 errorq_elem_t *eep, *nep, *fep, *dep; 697 errorq_t *eqp; 698 uint64_t loggedtmp; 699 uint64_t logged = 0; 700 701 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 702 if ((eqp->eq_flags & (ERRORQ_VITAL | ERRORQ_NVLIST)) != what) 703 continue; /* do not drain this queue on this pass */ 704 705 loggedtmp = eqp->eq_kstat.eqk_logged.value.ui64; 706 707 /* 708 * In case (1B) above, eq_ptail may be set but the casptr may 709 * not have been executed yet or may have failed. Either way, 710 * we must log errors in chronological order. So we search 711 * the pending list for the error pointed to by eq_ptail. If 712 * it is found, we know that all subsequent errors are also 713 * still on the pending list, so just NULL out eq_ptail and let 714 * errorq_drain(), below, take care of the logging. 715 */ 716 for (eep = eqp->eq_pend; eep != NULL; eep = eep->eqe_prev) { 717 if (eep == eqp->eq_ptail) { 718 ASSERT(eqp->eq_phead == NULL); 719 eqp->eq_ptail = NULL; 720 break; 721 } 722 } 723 724 /* 725 * In cases (1C) and (2) above, eq_ptail will be set to the 726 * newest error on the processing list but eq_phead will still 727 * be NULL. We set the eqe_next pointers so we can iterate 728 * over the processing list in order from oldest error to the 729 * newest error. We then set eq_phead to point to the oldest 730 * error and fall into the for-loop below. 731 */ 732 if (eqp->eq_phead == NULL && (eep = eqp->eq_ptail) != NULL) { 733 for (eep->eqe_next = NULL; eep->eqe_prev != NULL; 734 eep = eep->eqe_prev) 735 eep->eqe_prev->eqe_next = eep; 736 737 eqp->eq_phead = eep; 738 eqp->eq_ptail = NULL; 739 } 740 741 /* 742 * In cases (3) and (4) above (or after case (1C/2) handling), 743 * eq_phead will be set to the oldest error on the processing 744 * list. We log each error and return it to the free list. 745 * 746 * Unlike errorq_drain(), we don't need to worry about updating 747 * eq_phead because errorq_panic() will be called at most once. 748 * However, we must use casptr to update the freelist in case 749 * errors are still being enqueued during panic. 750 */ 751 for (eep = eqp->eq_phead; eep != NULL; eep = nep) { 752 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep); 753 eqp->eq_kstat.eqk_logged.value.ui64++; 754 755 nep = eep->eqe_next; 756 eep->eqe_next = NULL; 757 758 /* 759 * On panic, we add the element to the dump list for 760 * each nvlist errorq, stored oldest to newest. Then 761 * continue, so we don't free and subsequently overwrite 762 * any elements which we've put on the dump queue. 763 */ 764 if (eqp->eq_flags & ERRORQ_NVLIST) { 765 if (eqp->eq_dump == NULL) 766 dep = eqp->eq_dump = eep; 767 else 768 dep = dep->eqe_dump = eep; 769 membar_producer(); 770 continue; 771 } 772 773 for (;;) { 774 fep = eqp->eq_free; 775 eep->eqe_prev = fep; 776 membar_producer(); 777 778 if (casptr(&eqp->eq_free, fep, eep) == fep) 779 break; 780 } 781 } 782 783 /* 784 * Now go ahead and drain any other errors on the pending list. 785 * This call transparently handles case (1A) above, as well as 786 * any other errors that were dispatched after errorq_drain() 787 * completed its first compare-and-swap. 788 */ 789 errorq_drain(eqp); 790 791 logged += eqp->eq_kstat.eqk_logged.value.ui64 - loggedtmp; 792 } 793 return (logged); 794 } 795 796 /* 797 * Drain all error queues - called only from panic context. Some drain 798 * functions may enqueue errors to ERRORQ_NVLIST error queues so that 799 * they may be written out in the panic dump - so ERRORQ_NVLIST queues 800 * must be drained last. Drain ERRORQ_VITAL queues before nonvital queues 801 * so that vital errors get to fill the ERRORQ_NVLIST queues first, and 802 * do not drain the nonvital queues if there are many vital errors. 803 */ 804 void 805 errorq_panic(void) 806 { 807 ASSERT(panicstr != NULL); 808 809 if (errorq_panic_drain(ERRORQ_VITAL) <= errorq_vitalmin) 810 (void) errorq_panic_drain(0); 811 (void) errorq_panic_drain(ERRORQ_VITAL | ERRORQ_NVLIST); 812 (void) errorq_panic_drain(ERRORQ_NVLIST); 813 } 814 815 /* 816 * Reserve an error queue element for later processing and dispatching. The 817 * element is returned to the caller who may add error-specific data to 818 * element. The element is retured to the free list when either 819 * errorq_commit() is called and the element asynchronously processed 820 * or immediately when errorq_cancel() is called. 821 */ 822 errorq_elem_t * 823 errorq_reserve(errorq_t *eqp) 824 { 825 errorq_elem_t *eqep; 826 827 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 828 atomic_add_64(&errorq_lost, 1); 829 return (NULL); 830 } 831 832 while ((eqep = eqp->eq_free) != NULL) { 833 if (casptr(&eqp->eq_free, eqep, eqep->eqe_prev) == eqep) 834 break; 835 } 836 837 if (eqep == NULL) { 838 atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1); 839 return (NULL); 840 } 841 842 if (eqp->eq_flags & ERRORQ_NVLIST) { 843 errorq_nvelem_t *eqnp = eqep->eqe_data; 844 nv_alloc_reset(eqnp->eqn_nva); 845 eqnp->eqn_nvl = fm_nvlist_create(eqnp->eqn_nva); 846 } 847 848 atomic_add_64(&eqp->eq_kstat.eqk_reserved.value.ui64, 1); 849 return (eqep); 850 } 851 852 /* 853 * Commit an errorq element (eqep) for dispatching. 854 * This function may be called from any context subject 855 * to the Platform Considerations described above. 856 */ 857 void 858 errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag) 859 { 860 errorq_elem_t *old; 861 862 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 863 atomic_add_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64, 1); 864 return; 865 } 866 867 for (;;) { 868 old = eqp->eq_pend; 869 eqep->eqe_prev = old; 870 membar_producer(); 871 872 if (casptr(&eqp->eq_pend, old, eqep) == old) 873 break; 874 } 875 876 atomic_add_64(&eqp->eq_kstat.eqk_committed.value.ui64, 1); 877 878 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL) 879 ddi_trigger_softintr(eqp->eq_id); 880 } 881 882 /* 883 * Cancel an errorq element reservation by returning the specified element 884 * to the free list. Duplicate or invalid frees are not supported. 885 */ 886 void 887 errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep) 888 { 889 errorq_elem_t *fep; 890 891 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) 892 return; 893 894 for (;;) { 895 fep = eqp->eq_free; 896 eqep->eqe_prev = fep; 897 membar_producer(); 898 899 if (casptr(&eqp->eq_free, fep, eqep) == fep) 900 break; 901 } 902 903 atomic_add_64(&eqp->eq_kstat.eqk_cancelled.value.ui64, 1); 904 } 905 906 /* 907 * Write elements on the dump list of each nvlist errorq to the dump device. 908 * Upon reboot, fmd(1M) will extract and replay them for diagnosis. 909 */ 910 void 911 errorq_dump(void) 912 { 913 errorq_elem_t *eep; 914 errorq_t *eqp; 915 916 if (ereport_dumpbuf == NULL) 917 return; /* reboot or panic before errorq is even set up */ 918 919 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 920 if (!(eqp->eq_flags & ERRORQ_NVLIST) || 921 !(eqp->eq_flags & ERRORQ_ACTIVE)) 922 continue; /* do not dump this queue on panic */ 923 924 for (eep = eqp->eq_dump; eep != NULL; eep = eep->eqe_dump) { 925 errorq_nvelem_t *eqnp = eep->eqe_data; 926 size_t len = 0; 927 erpt_dump_t ed; 928 int err; 929 930 (void) nvlist_size(eqnp->eqn_nvl, 931 &len, NV_ENCODE_NATIVE); 932 933 if (len > ereport_dumplen || len == 0) { 934 cmn_err(CE_WARN, "%s: unable to save error " 935 "report %p due to size %lu\n", 936 eqp->eq_name, (void *)eep, len); 937 continue; 938 } 939 940 if ((err = nvlist_pack(eqnp->eqn_nvl, 941 (char **)&ereport_dumpbuf, &ereport_dumplen, 942 NV_ENCODE_NATIVE, KM_NOSLEEP)) != 0) { 943 cmn_err(CE_WARN, "%s: unable to save error " 944 "report %p due to pack error %d\n", 945 eqp->eq_name, (void *)eep, err); 946 continue; 947 } 948 949 ed.ed_magic = ERPT_MAGIC; 950 ed.ed_chksum = checksum32(ereport_dumpbuf, len); 951 ed.ed_size = (uint32_t)len; 952 ed.ed_pad = 0; 953 ed.ed_hrt_nsec = 0; 954 ed.ed_hrt_base = panic_hrtime; 955 ed.ed_tod_base.sec = panic_hrestime.tv_sec; 956 ed.ed_tod_base.nsec = panic_hrestime.tv_nsec; 957 958 dumpvp_write(&ed, sizeof (ed)); 959 dumpvp_write(ereport_dumpbuf, len); 960 } 961 } 962 } 963 964 nvlist_t * 965 errorq_elem_nvl(errorq_t *eqp, const errorq_elem_t *eqep) 966 { 967 errorq_nvelem_t *eqnp = eqep->eqe_data; 968 969 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST); 970 971 return (eqnp->eqn_nvl); 972 } 973 974 nv_alloc_t * 975 errorq_elem_nva(errorq_t *eqp, const errorq_elem_t *eqep) 976 { 977 errorq_nvelem_t *eqnp = eqep->eqe_data; 978 979 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST); 980 981 return (eqnp->eqn_nva); 982 } 983 984 /* 985 * Reserve a new element and duplicate the data of the original into it. 986 */ 987 void * 988 errorq_elem_dup(errorq_t *eqp, const errorq_elem_t *eqep, errorq_elem_t **neqep) 989 { 990 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE); 991 ASSERT(!(eqp->eq_flags & ERRORQ_NVLIST)); 992 993 if ((*neqep = errorq_reserve(eqp)) == NULL) 994 return (NULL); 995 996 bcopy(eqep->eqe_data, (*neqep)->eqe_data, eqp->eq_size); 997 return ((*neqep)->eqe_data); 998 } 999