1 /* 2 * c 2001 PPC 64 Team, IBM Corp 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * /dev/nvram driver for PPC64 10 * 11 * This perhaps should live in drivers/char 12 */ 13 14 15 #include <linux/types.h> 16 #include <linux/errno.h> 17 #include <linux/init.h> 18 #include <linux/spinlock.h> 19 #include <linux/slab.h> 20 #include <linux/kmsg_dump.h> 21 #include <linux/ctype.h> 22 #include <linux/zlib.h> 23 #include <asm/uaccess.h> 24 #include <asm/nvram.h> 25 #include <asm/rtas.h> 26 #include <asm/prom.h> 27 #include <asm/machdep.h> 28 29 /* Max bytes to read/write in one go */ 30 #define NVRW_CNT 0x20 31 32 static unsigned int nvram_size; 33 static int nvram_fetch, nvram_store; 34 static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ 35 static DEFINE_SPINLOCK(nvram_lock); 36 37 struct err_log_info { 38 int error_type; 39 unsigned int seq_num; 40 }; 41 42 struct nvram_os_partition { 43 const char *name; 44 int req_size; /* desired size, in bytes */ 45 int min_size; /* minimum acceptable size (0 means req_size) */ 46 long size; /* size of data portion (excluding err_log_info) */ 47 long index; /* offset of data portion of partition */ 48 }; 49 50 static struct nvram_os_partition rtas_log_partition = { 51 .name = "ibm,rtas-log", 52 .req_size = 2079, 53 .min_size = 1055, 54 .index = -1 55 }; 56 57 static struct nvram_os_partition oops_log_partition = { 58 .name = "lnx,oops-log", 59 .req_size = 4000, 60 .min_size = 2000, 61 .index = -1 62 }; 63 64 static const char *pseries_nvram_os_partitions[] = { 65 "ibm,rtas-log", 66 "lnx,oops-log", 67 NULL 68 }; 69 70 static void oops_to_nvram(struct kmsg_dumper *dumper, 71 enum kmsg_dump_reason reason, 72 const char *old_msgs, unsigned long old_len, 73 const char *new_msgs, unsigned long new_len); 74 75 static struct kmsg_dumper nvram_kmsg_dumper = { 76 .dump = oops_to_nvram 77 }; 78 79 /* See clobbering_unread_rtas_event() */ 80 #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */ 81 static unsigned long last_unread_rtas_event; /* timestamp */ 82 83 /* 84 * For capturing and compressing an oops or panic report... 85 86 * big_oops_buf[] holds the uncompressed text we're capturing. 87 * 88 * oops_buf[] holds the compressed text, preceded by a prefix. 89 * The prefix is just a u16 holding the length of the compressed* text. 90 * (*Or uncompressed, if compression fails.) oops_buf[] gets written 91 * to NVRAM. 92 * 93 * oops_len points to the prefix. oops_data points to the compressed text. 94 * 95 * +- oops_buf 96 * | +- oops_data 97 * v v 98 * +------------+-----------------------------------------------+ 99 * | length | text | 100 * | (2 bytes) | (oops_data_sz bytes) | 101 * +------------+-----------------------------------------------+ 102 * ^ 103 * +- oops_len 104 * 105 * We preallocate these buffers during init to avoid kmalloc during oops/panic. 106 */ 107 static size_t big_oops_buf_sz; 108 static char *big_oops_buf, *oops_buf; 109 static u16 *oops_len; 110 static char *oops_data; 111 static size_t oops_data_sz; 112 113 /* Compression parameters */ 114 #define COMPR_LEVEL 6 115 #define WINDOW_BITS 12 116 #define MEM_LEVEL 4 117 static struct z_stream_s stream; 118 119 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) 120 { 121 unsigned int i; 122 unsigned long len; 123 int done; 124 unsigned long flags; 125 char *p = buf; 126 127 128 if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) 129 return -ENODEV; 130 131 if (*index >= nvram_size) 132 return 0; 133 134 i = *index; 135 if (i + count > nvram_size) 136 count = nvram_size - i; 137 138 spin_lock_irqsave(&nvram_lock, flags); 139 140 for (; count != 0; count -= len) { 141 len = count; 142 if (len > NVRW_CNT) 143 len = NVRW_CNT; 144 145 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), 146 len) != 0) || len != done) { 147 spin_unlock_irqrestore(&nvram_lock, flags); 148 return -EIO; 149 } 150 151 memcpy(p, nvram_buf, len); 152 153 p += len; 154 i += len; 155 } 156 157 spin_unlock_irqrestore(&nvram_lock, flags); 158 159 *index = i; 160 return p - buf; 161 } 162 163 static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) 164 { 165 unsigned int i; 166 unsigned long len; 167 int done; 168 unsigned long flags; 169 const char *p = buf; 170 171 if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) 172 return -ENODEV; 173 174 if (*index >= nvram_size) 175 return 0; 176 177 i = *index; 178 if (i + count > nvram_size) 179 count = nvram_size - i; 180 181 spin_lock_irqsave(&nvram_lock, flags); 182 183 for (; count != 0; count -= len) { 184 len = count; 185 if (len > NVRW_CNT) 186 len = NVRW_CNT; 187 188 memcpy(nvram_buf, p, len); 189 190 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), 191 len) != 0) || len != done) { 192 spin_unlock_irqrestore(&nvram_lock, flags); 193 return -EIO; 194 } 195 196 p += len; 197 i += len; 198 } 199 spin_unlock_irqrestore(&nvram_lock, flags); 200 201 *index = i; 202 return p - buf; 203 } 204 205 static ssize_t pSeries_nvram_get_size(void) 206 { 207 return nvram_size ? nvram_size : -ENODEV; 208 } 209 210 211 /* nvram_write_os_partition, nvram_write_error_log 212 * 213 * We need to buffer the error logs into nvram to ensure that we have 214 * the failure information to decode. If we have a severe error there 215 * is no way to guarantee that the OS or the machine is in a state to 216 * get back to user land and write the error to disk. For example if 217 * the SCSI device driver causes a Machine Check by writing to a bad 218 * IO address, there is no way of guaranteeing that the device driver 219 * is in any state that is would also be able to write the error data 220 * captured to disk, thus we buffer it in NVRAM for analysis on the 221 * next boot. 222 * 223 * In NVRAM the partition containing the error log buffer will looks like: 224 * Header (in bytes): 225 * +-----------+----------+--------+------------+------------------+ 226 * | signature | checksum | length | name | data | 227 * |0 |1 |2 3|4 15|16 length-1| 228 * +-----------+----------+--------+------------+------------------+ 229 * 230 * The 'data' section would look like (in bytes): 231 * +--------------+------------+-----------------------------------+ 232 * | event_logged | sequence # | error log | 233 * |0 3|4 7|8 error_log_size-1| 234 * +--------------+------------+-----------------------------------+ 235 * 236 * event_logged: 0 if event has not been logged to syslog, 1 if it has 237 * sequence #: The unique sequence # for each event. (until it wraps) 238 * error log: The error log from event_scan 239 */ 240 int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, 241 int length, unsigned int err_type, unsigned int error_log_cnt) 242 { 243 int rc; 244 loff_t tmp_index; 245 struct err_log_info info; 246 247 if (part->index == -1) { 248 return -ESPIPE; 249 } 250 251 if (length > part->size) { 252 length = part->size; 253 } 254 255 info.error_type = err_type; 256 info.seq_num = error_log_cnt; 257 258 tmp_index = part->index; 259 260 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); 261 if (rc <= 0) { 262 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); 263 return rc; 264 } 265 266 rc = ppc_md.nvram_write(buff, length, &tmp_index); 267 if (rc <= 0) { 268 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); 269 return rc; 270 } 271 272 return 0; 273 } 274 275 int nvram_write_error_log(char * buff, int length, 276 unsigned int err_type, unsigned int error_log_cnt) 277 { 278 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, 279 err_type, error_log_cnt); 280 if (!rc) 281 last_unread_rtas_event = get_seconds(); 282 return rc; 283 } 284 285 /* nvram_read_error_log 286 * 287 * Reads nvram for error log for at most 'length' 288 */ 289 int nvram_read_error_log(char * buff, int length, 290 unsigned int * err_type, unsigned int * error_log_cnt) 291 { 292 int rc; 293 loff_t tmp_index; 294 struct err_log_info info; 295 296 if (rtas_log_partition.index == -1) 297 return -1; 298 299 if (length > rtas_log_partition.size) 300 length = rtas_log_partition.size; 301 302 tmp_index = rtas_log_partition.index; 303 304 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); 305 if (rc <= 0) { 306 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 307 return rc; 308 } 309 310 rc = ppc_md.nvram_read(buff, length, &tmp_index); 311 if (rc <= 0) { 312 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 313 return rc; 314 } 315 316 *error_log_cnt = info.seq_num; 317 *err_type = info.error_type; 318 319 return 0; 320 } 321 322 /* This doesn't actually zero anything, but it sets the event_logged 323 * word to tell that this event is safely in syslog. 324 */ 325 int nvram_clear_error_log(void) 326 { 327 loff_t tmp_index; 328 int clear_word = ERR_FLAG_ALREADY_LOGGED; 329 int rc; 330 331 if (rtas_log_partition.index == -1) 332 return -1; 333 334 tmp_index = rtas_log_partition.index; 335 336 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); 337 if (rc <= 0) { 338 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); 339 return rc; 340 } 341 last_unread_rtas_event = 0; 342 343 return 0; 344 } 345 346 /* pseries_nvram_init_os_partition 347 * 348 * This sets up a partition with an "OS" signature. 349 * 350 * The general strategy is the following: 351 * 1.) If a partition with the indicated name already exists... 352 * - If it's large enough, use it. 353 * - Otherwise, recycle it and keep going. 354 * 2.) Search for a free partition that is large enough. 355 * 3.) If there's not a free partition large enough, recycle any obsolete 356 * OS partitions and try again. 357 * 4.) Will first try getting a chunk that will satisfy the requested size. 358 * 5.) If a chunk of the requested size cannot be allocated, then try finding 359 * a chunk that will satisfy the minum needed. 360 * 361 * Returns 0 on success, else -1. 362 */ 363 static int __init pseries_nvram_init_os_partition(struct nvram_os_partition 364 *part) 365 { 366 loff_t p; 367 int size; 368 369 /* Scan nvram for partitions */ 370 nvram_scan_partitions(); 371 372 /* Look for ours */ 373 p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size); 374 375 /* Found one but too small, remove it */ 376 if (p && size < part->min_size) { 377 pr_info("nvram: Found too small %s partition," 378 " removing it...\n", part->name); 379 nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL); 380 p = 0; 381 } 382 383 /* Create one if we didn't find */ 384 if (!p) { 385 p = nvram_create_partition(part->name, NVRAM_SIG_OS, 386 part->req_size, part->min_size); 387 if (p == -ENOSPC) { 388 pr_info("nvram: No room to create %s partition, " 389 "deleting any obsolete OS partitions...\n", 390 part->name); 391 nvram_remove_partition(NULL, NVRAM_SIG_OS, 392 pseries_nvram_os_partitions); 393 p = nvram_create_partition(part->name, NVRAM_SIG_OS, 394 part->req_size, part->min_size); 395 } 396 } 397 398 if (p <= 0) { 399 pr_err("nvram: Failed to find or create %s" 400 " partition, err %d\n", part->name, (int)p); 401 return -1; 402 } 403 404 part->index = p; 405 part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info); 406 407 return 0; 408 } 409 410 static void __init nvram_init_oops_partition(int rtas_partition_exists) 411 { 412 int rc; 413 414 rc = pseries_nvram_init_os_partition(&oops_log_partition); 415 if (rc != 0) { 416 if (!rtas_partition_exists) 417 return; 418 pr_notice("nvram: Using %s partition to log both" 419 " RTAS errors and oops/panic reports\n", 420 rtas_log_partition.name); 421 memcpy(&oops_log_partition, &rtas_log_partition, 422 sizeof(rtas_log_partition)); 423 } 424 oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL); 425 if (!oops_buf) { 426 pr_err("nvram: No memory for %s partition\n", 427 oops_log_partition.name); 428 return; 429 } 430 oops_len = (u16*) oops_buf; 431 oops_data = oops_buf + sizeof(u16); 432 oops_data_sz = oops_log_partition.size - sizeof(u16); 433 434 /* 435 * Figure compression (preceded by elimination of each line's <n> 436 * severity prefix) will reduce the oops/panic report to at most 437 * 45% of its original size. 438 */ 439 big_oops_buf_sz = (oops_data_sz * 100) / 45; 440 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); 441 if (big_oops_buf) { 442 stream.workspace = kmalloc(zlib_deflate_workspacesize( 443 WINDOW_BITS, MEM_LEVEL), GFP_KERNEL); 444 if (!stream.workspace) { 445 pr_err("nvram: No memory for compression workspace; " 446 "skipping compression of %s partition data\n", 447 oops_log_partition.name); 448 kfree(big_oops_buf); 449 big_oops_buf = NULL; 450 } 451 } else { 452 pr_err("No memory for uncompressed %s data; " 453 "skipping compression\n", oops_log_partition.name); 454 stream.workspace = NULL; 455 } 456 457 rc = kmsg_dump_register(&nvram_kmsg_dumper); 458 if (rc != 0) { 459 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); 460 kfree(oops_buf); 461 kfree(big_oops_buf); 462 kfree(stream.workspace); 463 } 464 } 465 466 static int __init pseries_nvram_init_log_partitions(void) 467 { 468 int rc; 469 470 rc = pseries_nvram_init_os_partition(&rtas_log_partition); 471 nvram_init_oops_partition(rc == 0); 472 return 0; 473 } 474 machine_arch_initcall(pseries, pseries_nvram_init_log_partitions); 475 476 int __init pSeries_nvram_init(void) 477 { 478 struct device_node *nvram; 479 const unsigned int *nbytes_p; 480 unsigned int proplen; 481 482 nvram = of_find_node_by_type(NULL, "nvram"); 483 if (nvram == NULL) 484 return -ENODEV; 485 486 nbytes_p = of_get_property(nvram, "#bytes", &proplen); 487 if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { 488 of_node_put(nvram); 489 return -EIO; 490 } 491 492 nvram_size = *nbytes_p; 493 494 nvram_fetch = rtas_token("nvram-fetch"); 495 nvram_store = rtas_token("nvram-store"); 496 printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); 497 of_node_put(nvram); 498 499 ppc_md.nvram_read = pSeries_nvram_read; 500 ppc_md.nvram_write = pSeries_nvram_write; 501 ppc_md.nvram_size = pSeries_nvram_get_size; 502 503 return 0; 504 } 505 506 /* 507 * Try to capture the last capture_len bytes of the printk buffer. Return 508 * the amount actually captured. 509 */ 510 static size_t capture_last_msgs(const char *old_msgs, size_t old_len, 511 const char *new_msgs, size_t new_len, 512 char *captured, size_t capture_len) 513 { 514 if (new_len >= capture_len) { 515 memcpy(captured, new_msgs + (new_len - capture_len), 516 capture_len); 517 return capture_len; 518 } else { 519 /* Grab the end of old_msgs. */ 520 size_t old_tail_len = min(old_len, capture_len - new_len); 521 memcpy(captured, old_msgs + (old_len - old_tail_len), 522 old_tail_len); 523 memcpy(captured + old_tail_len, new_msgs, new_len); 524 return old_tail_len + new_len; 525 } 526 } 527 528 /* 529 * Are we using the ibm,rtas-log for oops/panic reports? And if so, 530 * would logging this oops/panic overwrite an RTAS event that rtas_errd 531 * hasn't had a chance to read and process? Return 1 if so, else 0. 532 * 533 * We assume that if rtas_errd hasn't read the RTAS event in 534 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. 535 */ 536 static int clobbering_unread_rtas_event(void) 537 { 538 return (oops_log_partition.index == rtas_log_partition.index 539 && last_unread_rtas_event 540 && get_seconds() - last_unread_rtas_event <= 541 NVRAM_RTAS_READ_TIMEOUT); 542 } 543 544 /* Squeeze out each line's <n> severity prefix. */ 545 static size_t elide_severities(char *buf, size_t len) 546 { 547 char *in, *out, *buf_end = buf + len; 548 /* Assume a <n> at the very beginning marks the start of a line. */ 549 int newline = 1; 550 551 in = out = buf; 552 while (in < buf_end) { 553 if (newline && in+3 <= buf_end && 554 *in == '<' && isdigit(in[1]) && in[2] == '>') { 555 in += 3; 556 newline = 0; 557 } else { 558 newline = (*in == '\n'); 559 *out++ = *in++; 560 } 561 } 562 return out - buf; 563 } 564 565 /* Derived from logfs_compress() */ 566 static int nvram_compress(const void *in, void *out, size_t inlen, 567 size_t outlen) 568 { 569 int err, ret; 570 571 ret = -EIO; 572 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, 573 MEM_LEVEL, Z_DEFAULT_STRATEGY); 574 if (err != Z_OK) 575 goto error; 576 577 stream.next_in = in; 578 stream.avail_in = inlen; 579 stream.total_in = 0; 580 stream.next_out = out; 581 stream.avail_out = outlen; 582 stream.total_out = 0; 583 584 err = zlib_deflate(&stream, Z_FINISH); 585 if (err != Z_STREAM_END) 586 goto error; 587 588 err = zlib_deflateEnd(&stream); 589 if (err != Z_OK) 590 goto error; 591 592 if (stream.total_out >= stream.total_in) 593 goto error; 594 595 ret = stream.total_out; 596 error: 597 return ret; 598 } 599 600 /* Compress the text from big_oops_buf into oops_buf. */ 601 static int zip_oops(size_t text_len) 602 { 603 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, 604 oops_data_sz); 605 if (zipped_len < 0) { 606 pr_err("nvram: compression failed; returned %d\n", zipped_len); 607 pr_err("nvram: logging uncompressed oops/panic report\n"); 608 return -1; 609 } 610 *oops_len = (u16) zipped_len; 611 return 0; 612 } 613 614 /* 615 * This is our kmsg_dump callback, called after an oops or panic report 616 * has been written to the printk buffer. We want to capture as much 617 * of the printk buffer as possible. First, capture as much as we can 618 * that we think will compress sufficiently to fit in the lnx,oops-log 619 * partition. If that's too much, go back and capture uncompressed text. 620 */ 621 static void oops_to_nvram(struct kmsg_dumper *dumper, 622 enum kmsg_dump_reason reason, 623 const char *old_msgs, unsigned long old_len, 624 const char *new_msgs, unsigned long new_len) 625 { 626 static unsigned int oops_count = 0; 627 static bool panicking = false; 628 size_t text_len; 629 unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; 630 int rc = -1; 631 632 switch (reason) { 633 case KMSG_DUMP_RESTART: 634 case KMSG_DUMP_HALT: 635 case KMSG_DUMP_POWEROFF: 636 /* These are almost always orderly shutdowns. */ 637 return; 638 case KMSG_DUMP_OOPS: 639 case KMSG_DUMP_KEXEC: 640 break; 641 case KMSG_DUMP_PANIC: 642 panicking = true; 643 break; 644 case KMSG_DUMP_EMERG: 645 if (panicking) 646 /* Panic report already captured. */ 647 return; 648 break; 649 default: 650 pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", 651 __FUNCTION__, (int) reason); 652 return; 653 } 654 655 if (clobbering_unread_rtas_event()) 656 return; 657 658 if (big_oops_buf) { 659 text_len = capture_last_msgs(old_msgs, old_len, 660 new_msgs, new_len, big_oops_buf, big_oops_buf_sz); 661 text_len = elide_severities(big_oops_buf, text_len); 662 rc = zip_oops(text_len); 663 } 664 if (rc != 0) { 665 text_len = capture_last_msgs(old_msgs, old_len, 666 new_msgs, new_len, oops_data, oops_data_sz); 667 err_type = ERR_TYPE_KERNEL_PANIC; 668 *oops_len = (u16) text_len; 669 } 670 671 (void) nvram_write_os_partition(&oops_log_partition, oops_buf, 672 (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); 673 } 674