1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Kernel Error Queues 31 * 32 * A common problem when handling hardware error traps and interrupts is that 33 * these errors frequently must be handled at high interrupt level, where 34 * reliably producing error messages and safely examining and manipulating 35 * other kernel state may not be possible. The kernel error queue primitive is 36 * a common set of routines that allow a subsystem to maintain a queue of 37 * errors that can be processed by an explicit call from a safe context or by a 38 * soft interrupt that fires at a specific lower interrupt level. The queue 39 * management code also ensures that if the system panics, all in-transit 40 * errors are logged prior to reset. Each queue has an associated kstat for 41 * observing the number of errors dispatched and logged, and mdb(1) debugging 42 * support is provided for live and post-mortem observability. 43 * 44 * Memory Allocation 45 * 46 * All of the queue data structures are allocated in advance as part of 47 * the errorq_create() call. No additional memory allocations are 48 * performed as part of errorq_dispatch(), errorq_reserve(), 49 * errorq_commit() or errorq_drain(). This design 50 * facilitates reliable error queue processing even when the system is low 51 * on memory, and ensures that errorq_dispatch() can be called from any 52 * context. When the queue is created, the maximum queue length is 53 * specified as a parameter to errorq_create() errorq_nvcreate(). This 54 * length should represent a reasonable upper bound on the number of 55 * simultaneous errors. If errorq_dispatch() or errorq_reserve() is 56 * invoked and no free queue elements are available, the error is 57 * dropped and will not be logged. Typically, the queue will only be 58 * exhausted by an error storm, and in this case 59 * the earlier errors provide the most important data for analysis. 60 * When a new error is dispatched, the error data is copied into the 61 * preallocated queue element so that the caller's buffer can be reused. 62 * 63 * When a new error is reserved, an element is moved from the free list 64 * and returned to the caller. The element buffer data, eqe_data, may be 65 * managed by the caller and dispatched to the errorq by calling 66 * errorq_commit(). This is useful for additions to errorq's 67 * created with errorq_nvcreate() to handle name-value pair (nvpair) data. 68 * See below for a discussion on nvlist errorq's. 69 * 70 * Queue Drain Callback 71 * 72 * When the error queue is drained, the caller's queue drain callback is 73 * invoked with a pointer to the saved error data. This function may be 74 * called from passive kernel context or soft interrupt context at or 75 * below LOCK_LEVEL, or as part of panic(). As such, the callback should 76 * basically only be calling cmn_err (but NOT with the CE_PANIC flag). 77 * The callback must not call panic(), attempt to allocate memory, or wait 78 * on a condition variable. The callback may not call errorq_destroy() 79 * or errorq_drain() on the same error queue that called it. 80 * 81 * The queue drain callback will always be called for each pending error 82 * in the order in which errors were enqueued (oldest to newest). The 83 * queue drain callback is guaranteed to provide at *least* once semantics 84 * for all errors that are successfully dispatched (i.e. for which 85 * errorq_dispatch() has successfully completed). If an unrelated panic 86 * occurs while the queue drain callback is running on a vital queue, the 87 * panic subsystem will continue the queue drain and the callback may be 88 * invoked again for the same error. Therefore, the callback should 89 * restrict itself to logging messages and taking other actions that are 90 * not destructive if repeated. 91 * 92 * Name-Value Pair Error Queues 93 * 94 * During error handling, it may be more convenient to store error 95 * queue element data as a fixed buffer of name-value pairs. The 96 * nvpair library allows construction and destruction of nvlists in 97 * in pre-allocated memory buffers. 98 * 99 * Error queues created via errorq_nvcreate() store queue element 100 * data as fixed buffer nvlists (ereports). errorq_reserve() 101 * allocates an errorq element from eqp->eq_free and returns a valid 102 * pointer to a errorq_elem_t (queue element) and a pre-allocated 103 * fixed buffer nvlist. errorq_elem_nvl() is used to gain access 104 * to the nvlist to add name-value ereport members prior to 105 * dispatching the error queue element in errorq_commit(). 106 * 107 * Once dispatched, the drain function will return the element to 108 * eqp->eq_free and reset the associated nv_alloc structure. 109 * error_cancel() may be called to cancel an element reservation 110 * element that was never dispatched (committed). This is useful in 111 * cases where a programming error prevents a queue element from being 112 * dispatched. 113 * 114 * Queue Management 115 * 116 * The queue element structures and error data buffers are allocated in 117 * two contiguous chunks as part of errorq_create() or errorq_nvcreate(). 118 * Each queue element structure contains a next pointer, 119 * a previous pointer, and a pointer to the corresponding error data 120 * buffer. The data buffer for a nvlist errorq is a shared buffer 121 * for the allocation of name-value pair lists. The elements are kept on 122 * one of three lists: 123 * 124 * Unused elements are kept on the free list, a singly-linked list pointed 125 * to by eqp->eq_free, and linked together using eqe_prev. The eqe_next 126 * pointer is not used by the free list and will be set to NULL. 127 * 128 * Pending errors are kept on the pending list, a singly-linked list 129 * pointed to by eqp->eq_pend, and linked together using eqe_prev. This 130 * list is maintained in order from newest error to oldest. The eqe_next 131 * pointer is not used by the pending list and will be set to NULL. 132 * 133 * The processing list is a doubly-linked list pointed to by eqp->eq_phead 134 * (the oldest element) and eqp->eq_ptail (the newest element). The 135 * eqe_next pointer is used to traverse from eq_phead to eq_ptail, and the 136 * eqe_prev pointer is used to traverse from eq_ptail to eq_phead. Once a 137 * queue drain operation begins, the current pending list is moved to the 138 * processing list in a two-phase commit fashion, allowing the panic code 139 * to always locate and process all pending errors in the event that a 140 * panic occurs in the middle of queue processing. 141 * 142 * A fourth list is maintained for nvlist errorqs. The dump list, 143 * eq_dump is used to link all errorq elements that should be stored 144 * in a crash dump file in the event of a system panic. During 145 * errorq_panic(), the list is created and subsequently traversed 146 * in errorq_dump() during the final phases of a crash dump. 147 * 148 * Platform Considerations 149 * 150 * In order to simplify their implementation, error queues make use of the 151 * C wrappers for compare-and-swap. If the platform itself does not 152 * support compare-and-swap in hardware and the kernel emulation routines 153 * are used instead, then the context in which errorq_dispatch() can be 154 * safely invoked is further constrained by the implementation of the 155 * compare-and-swap emulation. Specifically, if errorq_dispatch() is 156 * called from a code path that can be executed above ATOMIC_LEVEL on such 157 * a platform, the dispatch code could potentially deadlock unless the 158 * corresponding error interrupt is blocked or disabled prior to calling 159 * errorq_dispatch(). Error queues should therefore be deployed with 160 * caution on these platforms. 161 * 162 * Interfaces 163 * 164 * errorq_t *errorq_create(name, func, private, qlen, eltsize, ipl, flags); 165 * errorq_t *errorq_nvcreate(name, func, private, qlen, eltsize, ipl, flags); 166 * 167 * Create a new error queue with the specified name, callback, and 168 * properties. A pointer to the new error queue is returned upon success, 169 * or NULL is returned to indicate that the queue could not be created. 170 * This function must be called from passive kernel context with no locks 171 * held that can prevent a sleeping memory allocation from occurring. 172 * errorq_create() will return failure if the queue kstats cannot be 173 * created, or if a soft interrupt handler cannot be registered. 174 * 175 * The queue 'name' is a string that is recorded for live and post-mortem 176 * examination by a debugger. The queue callback 'func' will be invoked 177 * for each error drained from the queue, and will receive the 'private' 178 * pointer as its first argument. The callback must obey the rules for 179 * callbacks described above. The queue will have maximum length 'qlen' 180 * and each element will be able to record up to 'eltsize' bytes of data. 181 * The queue's soft interrupt (see errorq_dispatch(), below) will fire 182 * at 'ipl', which should not exceed LOCK_LEVEL. The queue 'flags' may 183 * include the following flag: 184 * 185 * ERRORQ_VITAL - This queue contains information that is considered 186 * vital to problem diagnosis. Error queues that are marked vital will 187 * be automatically drained by the panic subsystem prior to printing 188 * the panic messages to the console. 189 * 190 * void errorq_destroy(errorq); 191 * 192 * Destroy the specified error queue. The queue is drained of any 193 * pending elements and these are logged before errorq_destroy returns. 194 * Once errorq_destroy() begins draining the queue, any simultaneous 195 * calls to dispatch errors will result in the errors being dropped. 196 * The caller must invoke a higher-level abstraction (e.g. disabling 197 * an error interrupt) to ensure that error handling code does not 198 * attempt to dispatch errors to the queue while it is being freed. 199 * 200 * void errorq_dispatch(errorq, data, len, flag); 201 * 202 * Attempt to enqueue the specified error data. If a free queue element 203 * is available, the data is copied into a free element and placed on a 204 * pending list. If no free queue element is available, the error is 205 * dropped. The data length (len) is specified in bytes and should not 206 * exceed the queue's maximum element size. If the data length is less 207 * than the maximum element size, the remainder of the queue element is 208 * filled with zeroes. The flag parameter should be one of: 209 * 210 * ERRORQ_ASYNC - Schedule a soft interrupt at the previously specified 211 * IPL to asynchronously drain the queue on behalf of the caller. 212 * 213 * ERRORQ_SYNC - Do not schedule a soft interrupt to drain the queue. 214 * The caller is presumed to be calling errorq_drain() or panic() in 215 * the near future in order to drain the queue and log the error. 216 * 217 * The errorq_dispatch() function may be called from any context, subject 218 * to the Platform Considerations described above. 219 * 220 * void errorq_drain(errorq); 221 * 222 * Drain the error queue of all pending errors. The queue's callback 223 * function is invoked for each error in order from oldest to newest. 224 * This function may be used at or below LOCK_LEVEL or from panic context. 225 * 226 * errorq_elem_t *errorq_reserve(errorq); 227 * 228 * Reserve an error queue element for later processing and dispatching. 229 * The element is returned to the caller who may add error-specific data 230 * to element. The element is retured to the free list when either 231 * errorq_commit() is called and the element asynchronously processed 232 * or immediately when errorq_cancel() is called. 233 * 234 * void errorq_commit(errorq, errorq_elem, flag); 235 * 236 * Commit an errorq element (eqep) for dispatching, see 237 * errorq_dispatch(). 238 * 239 * void errorq_cancel(errorq, errorq_elem); 240 * 241 * Cancel a pending errorq element reservation. The errorq element is 242 * returned to the free list upon cancelation. 243 */ 244 245 #include <sys/errorq_impl.h> 246 #include <sys/sysmacros.h> 247 #include <sys/machlock.h> 248 #include <sys/cmn_err.h> 249 #include <sys/atomic.h> 250 #include <sys/systm.h> 251 #include <sys/kmem.h> 252 #include <sys/conf.h> 253 #include <sys/ddi.h> 254 #include <sys/sunddi.h> 255 #include <sys/bootconf.h> 256 #include <sys/spl.h> 257 #include <sys/dumphdr.h> 258 #include <sys/compress.h> 259 #include <sys/time.h> 260 #include <sys/panic.h> 261 #include <sys/fm/protocol.h> 262 #include <sys/fm/util.h> 263 264 static struct errorq_kstat errorq_kstat_template = { 265 { "dispatched", KSTAT_DATA_UINT64 }, 266 { "dropped", KSTAT_DATA_UINT64 }, 267 { "logged", KSTAT_DATA_UINT64 }, 268 { "reserved", KSTAT_DATA_UINT64 }, 269 { "reserve_fail", KSTAT_DATA_UINT64 }, 270 { "committed", KSTAT_DATA_UINT64 }, 271 { "commit_fail", KSTAT_DATA_UINT64 }, 272 { "cancelled", KSTAT_DATA_UINT64 } 273 }; 274 275 static uint64_t errorq_lost = 0; 276 static errorq_t *errorq_list = NULL; 277 static kmutex_t errorq_lock; 278 static uint64_t errorq_vitalmin = 5; 279 280 static uint_t 281 errorq_intr(caddr_t eqp) 282 { 283 errorq_drain((errorq_t *)eqp); 284 return (DDI_INTR_CLAIMED); 285 } 286 287 /* 288 * Create a new error queue with the specified properties and add a software 289 * interrupt handler and kstat for it. This function must be called from 290 * passive kernel context with no locks held that can prevent a sleeping 291 * memory allocation from occurring. This function will return NULL if the 292 * softint or kstat for this queue cannot be created. 293 */ 294 errorq_t * 295 errorq_create(const char *name, errorq_func_t func, void *private, 296 ulong_t qlen, size_t size, uint_t ipl, uint_t flags) 297 { 298 errorq_t *eqp = kmem_alloc(sizeof (errorq_t), KM_SLEEP); 299 ddi_iblock_cookie_t ibc = (ddi_iblock_cookie_t)(uintptr_t)ipltospl(ipl); 300 dev_info_t *dip = ddi_root_node(); 301 302 errorq_elem_t *eep; 303 ddi_softintr_t id = NULL; 304 caddr_t data; 305 306 ASSERT(qlen != 0 && size != 0); 307 ASSERT(ipl > 0 && ipl <= LOCK_LEVEL); 308 309 /* 310 * If a queue is created very early in boot before device tree services 311 * are available, the queue softint handler cannot be created. We 312 * manually drain these queues and create their softint handlers when 313 * it is safe to do so as part of errorq_init(), below. 314 */ 315 if (modrootloaded && ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id, 316 &ibc, NULL, errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) { 317 cmn_err(CE_WARN, "errorq_create: failed to register " 318 "IPL %u softint for queue %s", ipl, name); 319 kmem_free(eqp, sizeof (errorq_t)); 320 return (NULL); 321 } 322 323 if ((eqp->eq_ksp = kstat_create("unix", 0, (char *)name, "errorq", 324 KSTAT_TYPE_NAMED, sizeof (struct errorq_kstat) / 325 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL)) == NULL) { 326 cmn_err(CE_WARN, "errorq_create: failed to create kstat " 327 "for queue %s", name); 328 if (id != NULL) 329 ddi_remove_softintr(id); 330 kmem_free(eqp, sizeof (errorq_t)); 331 return (NULL); 332 } 333 334 bcopy(&errorq_kstat_template, &eqp->eq_kstat, 335 sizeof (struct errorq_kstat)); 336 eqp->eq_ksp->ks_data = &eqp->eq_kstat; 337 eqp->eq_ksp->ks_private = eqp; 338 kstat_install(eqp->eq_ksp); 339 340 (void) strncpy(eqp->eq_name, name, ERRORQ_NAMELEN); 341 eqp->eq_name[ERRORQ_NAMELEN] = '\0'; 342 eqp->eq_func = func; 343 eqp->eq_private = private; 344 eqp->eq_data = kmem_alloc(qlen * size, KM_SLEEP); 345 eqp->eq_qlen = qlen; 346 eqp->eq_size = size; 347 eqp->eq_ipl = ipl; 348 eqp->eq_flags = flags | ERRORQ_ACTIVE; 349 eqp->eq_id = id; 350 mutex_init(&eqp->eq_lock, NULL, MUTEX_DEFAULT, NULL); 351 eqp->eq_elems = kmem_alloc(qlen * sizeof (errorq_elem_t), KM_SLEEP); 352 eqp->eq_phead = NULL; 353 eqp->eq_ptail = NULL; 354 eqp->eq_pend = NULL; 355 eqp->eq_dump = NULL; 356 eqp->eq_free = eqp->eq_elems; 357 358 /* 359 * Iterate over the array of errorq_elem_t structures and place each 360 * one on the free list and set its data pointer. 361 */ 362 for (eep = eqp->eq_free, data = eqp->eq_data; qlen > 1; qlen--) { 363 eep->eqe_next = NULL; 364 eep->eqe_dump = NULL; 365 eep->eqe_prev = eep + 1; 366 eep->eqe_data = data; 367 data += size; 368 eep++; 369 } 370 371 eep->eqe_next = NULL; 372 eep->eqe_prev = NULL; 373 eep->eqe_data = data; 374 eep->eqe_dump = NULL; 375 376 /* 377 * Once the errorq is initialized, add it to the global list of queues, 378 * and then return a pointer to the new queue to the caller. 379 */ 380 mutex_enter(&errorq_lock); 381 eqp->eq_next = errorq_list; 382 errorq_list = eqp; 383 mutex_exit(&errorq_lock); 384 385 return (eqp); 386 } 387 388 /* 389 * Create a new errorq as if by errorq_create(), but set the ERRORQ_NVLIST 390 * flag and initialize each element to have the start of its data region used 391 * as an errorq_nvelem_t with a nvlist allocator that consumes the data region. 392 */ 393 errorq_t * 394 errorq_nvcreate(const char *name, errorq_func_t func, void *private, 395 ulong_t qlen, size_t size, uint_t ipl, uint_t flags) 396 { 397 errorq_t *eqp; 398 errorq_elem_t *eep; 399 400 eqp = errorq_create(name, func, private, qlen, 401 size + sizeof (errorq_nvelem_t), ipl, flags | ERRORQ_NVLIST); 402 403 if (eqp == NULL) 404 return (NULL); 405 406 mutex_enter(&eqp->eq_lock); 407 408 for (eep = eqp->eq_elems; qlen != 0; eep++, qlen--) { 409 errorq_nvelem_t *eqnp = eep->eqe_data; 410 eqnp->eqn_buf = (char *)eqnp + sizeof (errorq_nvelem_t); 411 eqnp->eqn_nva = fm_nva_xcreate(eqnp->eqn_buf, size); 412 } 413 414 mutex_exit(&eqp->eq_lock); 415 return (eqp); 416 } 417 418 /* 419 * To destroy an error queue, we mark it as disabled and then explicitly drain 420 * all pending errors. Once the drain is complete, we can remove the queue 421 * from the global list of queues examined by errorq_panic(), and then free 422 * the various queue data structures. The caller must use some higher-level 423 * abstraction (e.g. disabling an error interrupt) to ensure that no one will 424 * attempt to enqueue new errors while we are freeing this queue. 425 */ 426 void 427 errorq_destroy(errorq_t *eqp) 428 { 429 errorq_t *p, **pp; 430 errorq_elem_t *eep; 431 ulong_t i; 432 433 ASSERT(eqp != NULL); 434 eqp->eq_flags &= ~ERRORQ_ACTIVE; 435 errorq_drain(eqp); 436 437 mutex_enter(&errorq_lock); 438 pp = &errorq_list; 439 440 for (p = errorq_list; p != NULL; p = p->eq_next) { 441 if (p == eqp) { 442 *pp = p->eq_next; 443 break; 444 } 445 pp = &p->eq_next; 446 } 447 448 mutex_exit(&errorq_lock); 449 ASSERT(p != NULL); 450 451 if (eqp->eq_flags & ERRORQ_NVLIST) { 452 for (eep = eqp->eq_elems, i = 0; i < eqp->eq_qlen; i++, eep++) { 453 errorq_nvelem_t *eqnp = eep->eqe_data; 454 fm_nva_xdestroy(eqnp->eqn_nva); 455 } 456 } 457 458 mutex_destroy(&eqp->eq_lock); 459 kstat_delete(eqp->eq_ksp); 460 461 if (eqp->eq_id != NULL) 462 ddi_remove_softintr(eqp->eq_id); 463 464 kmem_free(eqp->eq_elems, eqp->eq_qlen * sizeof (errorq_elem_t)); 465 kmem_free(eqp->eq_data, eqp->eq_qlen * eqp->eq_size); 466 467 kmem_free(eqp, sizeof (errorq_t)); 468 } 469 470 /* 471 * Dispatch a new error into the queue for later processing. The specified 472 * data buffer is copied into a preallocated queue element. If 'len' is 473 * smaller than the queue element size, the remainder of the queue element is 474 * filled with zeroes. This function may be called from any context subject 475 * to the Platform Considerations described above. 476 */ 477 void 478 errorq_dispatch(errorq_t *eqp, const void *data, size_t len, uint_t flag) 479 { 480 errorq_elem_t *eep, *old; 481 482 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 483 atomic_add_64(&errorq_lost, 1); 484 return; /* drop error if queue is uninitialized or disabled */ 485 } 486 487 while ((eep = eqp->eq_free) != NULL) { 488 if (casptr(&eqp->eq_free, eep, eep->eqe_prev) == eep) 489 break; 490 } 491 492 if (eep == NULL) { 493 atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1); 494 return; 495 } 496 497 ASSERT(len <= eqp->eq_size); 498 bcopy(data, eep->eqe_data, MIN(eqp->eq_size, len)); 499 500 if (len < eqp->eq_size) 501 bzero((caddr_t)eep->eqe_data + len, eqp->eq_size - len); 502 503 for (;;) { 504 old = eqp->eq_pend; 505 eep->eqe_prev = old; 506 membar_producer(); 507 508 if (casptr(&eqp->eq_pend, old, eep) == old) 509 break; 510 } 511 512 atomic_add_64(&eqp->eq_kstat.eqk_dispatched.value.ui64, 1); 513 514 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL) 515 ddi_trigger_softintr(eqp->eq_id); 516 } 517 518 /* 519 * Drain the specified error queue by calling eq_func() for each pending error. 520 * This function must be called at or below LOCK_LEVEL or from panic context. 521 * In order to synchronize with other attempts to drain the queue, we acquire 522 * the adaptive eq_lock, blocking other consumers. Once this lock is held, 523 * we must use compare-and-swap to move the pending list to the processing 524 * list and to return elements to the free list in order to synchronize 525 * with producers, who do not acquire any locks and only use compare-and-swap. 526 * 527 * An additional constraint on this function is that if the system panics 528 * while this function is running, the panic code must be able to detect and 529 * handle all intermediate states and correctly dequeue all errors. The 530 * errorq_panic() function below will be used for detecting and handling 531 * these intermediate states. The comments in errorq_drain() below explain 532 * how we make sure each intermediate state is distinct and consistent. 533 */ 534 void 535 errorq_drain(errorq_t *eqp) 536 { 537 errorq_elem_t *eep, *fep, *dep; 538 539 ASSERT(eqp != NULL); 540 mutex_enter(&eqp->eq_lock); 541 542 /* 543 * If there are one or more pending errors, set eq_ptail to point to 544 * the first element on the pending list and then attempt to compare- 545 * and-swap NULL to the pending list. We use membar_producer() to 546 * make sure that eq_ptail will be visible to errorq_panic() below 547 * before the pending list is NULLed out. This section is labeled 548 * case (1) for errorq_panic, below. If eq_ptail is not yet set (1A) 549 * eq_pend has all the pending errors. If casptr fails or has not 550 * been called yet (1B), eq_pend still has all the pending errors. 551 * If casptr succeeds (1C), eq_ptail has all the pending errors. 552 */ 553 while ((eep = eqp->eq_pend) != NULL) { 554 eqp->eq_ptail = eep; 555 membar_producer(); 556 557 if (casptr(&eqp->eq_pend, eep, NULL) == eep) 558 break; 559 } 560 561 /* 562 * If no errors were pending, assert that eq_ptail is set to NULL, 563 * drop the consumer lock, and return without doing anything. 564 */ 565 if (eep == NULL) { 566 ASSERT(eqp->eq_ptail == NULL); 567 mutex_exit(&eqp->eq_lock); 568 return; 569 } 570 571 /* 572 * Now iterate from eq_ptail (a.k.a. eep, the newest error) to the 573 * oldest error, setting the eqe_next pointer so that we can iterate 574 * over the errors from oldest to newest. We use membar_producer() 575 * to make sure that these stores are visible before we set eq_phead. 576 * If we panic before, during, or just after this loop (case 2), 577 * errorq_panic() will simply redo this work, as described below. 578 */ 579 for (eep->eqe_next = NULL; eep->eqe_prev != NULL; eep = eep->eqe_prev) 580 eep->eqe_prev->eqe_next = eep; 581 membar_producer(); 582 583 /* 584 * Now set eq_phead to the head of the processing list (the oldest 585 * error) and issue another membar_producer() to make sure that 586 * eq_phead is seen as non-NULL before we clear eq_ptail. If we panic 587 * after eq_phead is set (case 3), we will detect and log these errors 588 * in errorq_panic(), as described below. 589 */ 590 eqp->eq_phead = eep; 591 membar_producer(); 592 593 eqp->eq_ptail = NULL; 594 membar_producer(); 595 596 /* 597 * If we enter from errorq_panic_drain(), we may already have 598 * errorq elements on the dump list. Find the tail of 599 * the list ready for append. 600 */ 601 if (panicstr && (dep = eqp->eq_dump) != NULL) { 602 while (dep->eqe_dump != NULL) 603 dep = dep->eqe_dump; 604 } 605 606 /* 607 * Now iterate over the processing list from oldest (eq_phead) to 608 * newest and log each error. Once an error is logged, we use 609 * compare-and-swap to return it to the free list. If we panic before, 610 * during, or after calling eq_func() (case 4), the error will still be 611 * found on eq_phead and will be logged in errorq_panic below. 612 */ 613 614 while ((eep = eqp->eq_phead) != NULL) { 615 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep); 616 eqp->eq_kstat.eqk_logged.value.ui64++; 617 618 eqp->eq_phead = eep->eqe_next; 619 membar_producer(); 620 621 eep->eqe_next = NULL; 622 623 for (;;) { 624 fep = eqp->eq_free; 625 eep->eqe_prev = fep; 626 membar_producer(); 627 628 if (casptr(&eqp->eq_free, fep, eep) == fep) 629 break; 630 } 631 632 /* 633 * On panic, we add the element to the dump list for each 634 * nvlist errorq. Elements are stored oldest to newest. 635 */ 636 if (panicstr && (eqp->eq_flags & ERRORQ_NVLIST)) { 637 if (eqp->eq_dump == NULL) 638 dep = eqp->eq_dump = eep; 639 else 640 dep = dep->eqe_dump = eep; 641 membar_producer(); 642 } 643 } 644 645 mutex_exit(&eqp->eq_lock); 646 } 647 648 /* 649 * Now that device tree services are available, set up the soft interrupt 650 * handlers for any queues that were created early in boot. We then 651 * manually drain these queues to report any pending early errors. 652 */ 653 void 654 errorq_init(void) 655 { 656 dev_info_t *dip = ddi_root_node(); 657 ddi_softintr_t id; 658 errorq_t *eqp; 659 660 ASSERT(modrootloaded != 0); 661 ASSERT(dip != NULL); 662 663 mutex_enter(&errorq_lock); 664 665 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 666 ddi_iblock_cookie_t ibc = 667 (ddi_iblock_cookie_t)(uintptr_t)ipltospl(eqp->eq_ipl); 668 669 if (eqp->eq_id != NULL) 670 continue; /* softint already initialized */ 671 672 if (ddi_add_softintr(dip, DDI_SOFTINT_FIXED, &id, &ibc, NULL, 673 errorq_intr, (caddr_t)eqp) != DDI_SUCCESS) { 674 panic("errorq_init: failed to register IPL %u softint " 675 "for queue %s", eqp->eq_ipl, eqp->eq_name); 676 } 677 678 eqp->eq_id = id; 679 errorq_drain(eqp); 680 } 681 682 mutex_exit(&errorq_lock); 683 } 684 685 /* 686 * This function is designed to be called from panic context only, and 687 * therefore does not need to acquire errorq_lock when iterating over 688 * errorq_list. This function must be called no more than once for each 689 * 'what' value (if you change this then review the manipulation of 'dep'. 690 */ 691 static uint64_t 692 errorq_panic_drain(uint_t what) 693 { 694 errorq_elem_t *eep, *nep, *fep, *dep; 695 errorq_t *eqp; 696 uint64_t loggedtmp; 697 uint64_t logged = 0; 698 699 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 700 if ((eqp->eq_flags & (ERRORQ_VITAL | ERRORQ_NVLIST)) != what) 701 continue; /* do not drain this queue on this pass */ 702 703 loggedtmp = eqp->eq_kstat.eqk_logged.value.ui64; 704 705 /* 706 * In case (1B) above, eq_ptail may be set but the casptr may 707 * not have been executed yet or may have failed. Either way, 708 * we must log errors in chronological order. So we search 709 * the pending list for the error pointed to by eq_ptail. If 710 * it is found, we know that all subsequent errors are also 711 * still on the pending list, so just NULL out eq_ptail and let 712 * errorq_drain(), below, take care of the logging. 713 */ 714 for (eep = eqp->eq_pend; eep != NULL; eep = eep->eqe_prev) { 715 if (eep == eqp->eq_ptail) { 716 ASSERT(eqp->eq_phead == NULL); 717 eqp->eq_ptail = NULL; 718 break; 719 } 720 } 721 722 /* 723 * In cases (1C) and (2) above, eq_ptail will be set to the 724 * newest error on the processing list but eq_phead will still 725 * be NULL. We set the eqe_next pointers so we can iterate 726 * over the processing list in order from oldest error to the 727 * newest error. We then set eq_phead to point to the oldest 728 * error and fall into the for-loop below. 729 */ 730 if (eqp->eq_phead == NULL && (eep = eqp->eq_ptail) != NULL) { 731 for (eep->eqe_next = NULL; eep->eqe_prev != NULL; 732 eep = eep->eqe_prev) 733 eep->eqe_prev->eqe_next = eep; 734 735 eqp->eq_phead = eep; 736 eqp->eq_ptail = NULL; 737 } 738 739 /* 740 * In cases (3) and (4) above (or after case (1C/2) handling), 741 * eq_phead will be set to the oldest error on the processing 742 * list. We log each error and return it to the free list. 743 * 744 * Unlike errorq_drain(), we don't need to worry about updating 745 * eq_phead because errorq_panic() will be called at most once. 746 * However, we must use casptr to update the freelist in case 747 * errors are still being enqueued during panic. 748 */ 749 for (eep = eqp->eq_phead; eep != NULL; eep = nep) { 750 eqp->eq_func(eqp->eq_private, eep->eqe_data, eep); 751 eqp->eq_kstat.eqk_logged.value.ui64++; 752 753 nep = eep->eqe_next; 754 eep->eqe_next = NULL; 755 756 for (;;) { 757 fep = eqp->eq_free; 758 eep->eqe_prev = fep; 759 membar_producer(); 760 761 if (casptr(&eqp->eq_free, fep, eep) == fep) 762 break; 763 } 764 765 /* 766 * On panic, we add the element to the dump list for 767 * each nvlist errorq, stored oldest to newest. 768 */ 769 if (eqp->eq_flags & ERRORQ_NVLIST) { 770 if (eqp->eq_dump == NULL) 771 dep = eqp->eq_dump = eep; 772 else 773 dep = dep->eqe_dump = eep; 774 membar_producer(); 775 } 776 } 777 778 /* 779 * Now go ahead and drain any other errors on the pending list. 780 * This call transparently handles case (1A) above, as well as 781 * any other errors that were dispatched after errorq_drain() 782 * completed its first compare-and-swap. 783 */ 784 errorq_drain(eqp); 785 786 logged += eqp->eq_kstat.eqk_logged.value.ui64 - loggedtmp; 787 } 788 return (logged); 789 } 790 791 /* 792 * Drain all error queues - called only from panic context. Some drain 793 * functions may enqueue errors to ERRORQ_NVLIST error queues so that 794 * they may be written out in the panic dump - so ERRORQ_NVLIST queues 795 * must be drained last. Drain ERRORQ_VITAL queues before nonvital queues 796 * so that vital errors get to fill the ERRORQ_NVLIST queues first, and 797 * do not drain the nonvital queues if there are many vital errors. 798 */ 799 void 800 errorq_panic(void) 801 { 802 ASSERT(panicstr != NULL); 803 804 if (errorq_panic_drain(ERRORQ_VITAL) <= errorq_vitalmin) 805 (void) errorq_panic_drain(0); 806 (void) errorq_panic_drain(ERRORQ_VITAL | ERRORQ_NVLIST); 807 (void) errorq_panic_drain(ERRORQ_NVLIST); 808 } 809 810 /* 811 * Reserve an error queue element for later processing and dispatching. The 812 * element is returned to the caller who may add error-specific data to 813 * element. The element is retured to the free list when either 814 * errorq_commit() is called and the element asynchronously processed 815 * or immediately when errorq_cancel() is called. 816 */ 817 errorq_elem_t * 818 errorq_reserve(errorq_t *eqp) 819 { 820 errorq_elem_t *eqep; 821 822 if (eqp == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 823 atomic_add_64(&errorq_lost, 1); 824 return (NULL); 825 } 826 827 while ((eqep = eqp->eq_free) != NULL) { 828 if (casptr(&eqp->eq_free, eqep, eqep->eqe_prev) == eqep) 829 break; 830 } 831 832 if (eqep == NULL) { 833 atomic_add_64(&eqp->eq_kstat.eqk_dropped.value.ui64, 1); 834 return (NULL); 835 } 836 837 if (eqp->eq_flags & ERRORQ_NVLIST) { 838 errorq_nvelem_t *eqnp = eqep->eqe_data; 839 nv_alloc_reset(eqnp->eqn_nva); 840 eqnp->eqn_nvl = fm_nvlist_create(eqnp->eqn_nva); 841 } 842 843 atomic_add_64(&eqp->eq_kstat.eqk_reserved.value.ui64, 1); 844 return (eqep); 845 } 846 847 /* 848 * Commit an errorq element (eqep) for dispatching. 849 * This function may be called from any context subject 850 * to the Platform Considerations described above. 851 */ 852 void 853 errorq_commit(errorq_t *eqp, errorq_elem_t *eqep, uint_t flag) 854 { 855 errorq_elem_t *old; 856 857 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) { 858 atomic_add_64(&eqp->eq_kstat.eqk_commit_fail.value.ui64, 1); 859 return; 860 } 861 862 for (;;) { 863 old = eqp->eq_pend; 864 eqep->eqe_prev = old; 865 membar_producer(); 866 867 if (casptr(&eqp->eq_pend, old, eqep) == old) 868 break; 869 } 870 871 atomic_add_64(&eqp->eq_kstat.eqk_committed.value.ui64, 1); 872 873 if (flag == ERRORQ_ASYNC && eqp->eq_id != NULL) 874 ddi_trigger_softintr(eqp->eq_id); 875 } 876 877 /* 878 * Cancel an errorq element reservation by returning the specified element 879 * to the free list. Duplicate or invalid frees are not supported. 880 */ 881 void 882 errorq_cancel(errorq_t *eqp, errorq_elem_t *eqep) 883 { 884 errorq_elem_t *fep; 885 886 if (eqep == NULL || !(eqp->eq_flags & ERRORQ_ACTIVE)) 887 return; 888 889 for (;;) { 890 fep = eqp->eq_free; 891 eqep->eqe_prev = fep; 892 membar_producer(); 893 894 if (casptr(&eqp->eq_free, fep, eqep) == fep) 895 break; 896 } 897 898 atomic_add_64(&eqp->eq_kstat.eqk_cancelled.value.ui64, 1); 899 } 900 901 /* 902 * Write elements on the dump list of each nvlist errorq to the dump device. 903 * Upon reboot, fmd(1M) will extract and replay them for diagnosis. 904 */ 905 void 906 errorq_dump(void) 907 { 908 errorq_elem_t *eep; 909 errorq_t *eqp; 910 911 if (ereport_dumpbuf == NULL) 912 return; /* reboot or panic before errorq is even set up */ 913 914 for (eqp = errorq_list; eqp != NULL; eqp = eqp->eq_next) { 915 if (!(eqp->eq_flags & ERRORQ_NVLIST) || 916 !(eqp->eq_flags & ERRORQ_ACTIVE)) 917 continue; /* do not dump this queue on panic */ 918 919 for (eep = eqp->eq_dump; eep != NULL; eep = eep->eqe_dump) { 920 errorq_nvelem_t *eqnp = eep->eqe_data; 921 size_t len = 0; 922 erpt_dump_t ed; 923 int err; 924 925 (void) nvlist_size(eqnp->eqn_nvl, 926 &len, NV_ENCODE_NATIVE); 927 928 if (len > ereport_dumplen || len == 0) { 929 cmn_err(CE_WARN, "%s: unable to save error " 930 "report %p due to size %lu\n", 931 eqp->eq_name, (void *)eep, len); 932 continue; 933 } 934 935 if ((err = nvlist_pack(eqnp->eqn_nvl, 936 (char **)&ereport_dumpbuf, &ereport_dumplen, 937 NV_ENCODE_NATIVE, KM_NOSLEEP)) != 0) { 938 cmn_err(CE_WARN, "%s: unable to save error " 939 "report %p due to pack error %d\n", 940 eqp->eq_name, (void *)eep, err); 941 continue; 942 } 943 944 ed.ed_magic = ERPT_MAGIC; 945 ed.ed_chksum = checksum32(ereport_dumpbuf, len); 946 ed.ed_size = (uint32_t)len; 947 ed.ed_pad = 0; 948 ed.ed_hrt_nsec = 0; 949 ed.ed_hrt_base = panic_hrtime; 950 ed.ed_tod_base.sec = panic_hrestime.tv_sec; 951 ed.ed_tod_base.nsec = panic_hrestime.tv_nsec; 952 953 dumpvp_write(&ed, sizeof (ed)); 954 dumpvp_write(ereport_dumpbuf, len); 955 } 956 } 957 } 958 959 nvlist_t * 960 errorq_elem_nvl(errorq_t *eqp, const errorq_elem_t *eqep) 961 { 962 errorq_nvelem_t *eqnp = eqep->eqe_data; 963 964 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST); 965 966 return (eqnp->eqn_nvl); 967 } 968 969 nv_alloc_t * 970 errorq_elem_nva(errorq_t *eqp, const errorq_elem_t *eqep) 971 { 972 errorq_nvelem_t *eqnp = eqep->eqe_data; 973 974 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE && eqp->eq_flags & ERRORQ_NVLIST); 975 976 return (eqnp->eqn_nva); 977 } 978 979 /* 980 * Reserve a new element and duplicate the data of the original into it. 981 */ 982 void * 983 errorq_elem_dup(errorq_t *eqp, const errorq_elem_t *eqep, errorq_elem_t **neqep) 984 { 985 ASSERT(eqp->eq_flags & ERRORQ_ACTIVE); 986 ASSERT(!(eqp->eq_flags & ERRORQ_NVLIST)); 987 988 if ((*neqep = errorq_reserve(eqp)) == NULL) 989 return (NULL); 990 991 bcopy(eqep->eqe_data, (*neqep)->eqe_data, eqp->eq_size); 992 return ((*neqep)->eqe_data); 993 } 994