1 /* 2 * c 2001 PPC 64 Team, IBM Corp 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * /dev/nvram driver for PPC64 10 * 11 * This perhaps should live in drivers/char 12 */ 13 14 15 #include <linux/types.h> 16 #include <linux/errno.h> 17 #include <linux/init.h> 18 #include <linux/spinlock.h> 19 #include <linux/slab.h> 20 #include <linux/kmsg_dump.h> 21 #include <linux/ctype.h> 22 #include <linux/zlib.h> 23 #include <asm/uaccess.h> 24 #include <asm/nvram.h> 25 #include <asm/rtas.h> 26 #include <asm/prom.h> 27 #include <asm/machdep.h> 28 29 /* Max bytes to read/write in one go */ 30 #define NVRW_CNT 0x20 31 32 static unsigned int nvram_size; 33 static int nvram_fetch, nvram_store; 34 static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ 35 static DEFINE_SPINLOCK(nvram_lock); 36 37 struct err_log_info { 38 int error_type; 39 unsigned int seq_num; 40 }; 41 42 struct nvram_os_partition { 43 const char *name; 44 int req_size; /* desired size, in bytes */ 45 int min_size; /* minimum acceptable size (0 means req_size) */ 46 long size; /* size of data portion (excluding err_log_info) */ 47 long index; /* offset of data portion of partition */ 48 }; 49 50 static struct nvram_os_partition rtas_log_partition = { 51 .name = "ibm,rtas-log", 52 .req_size = 2079, 53 .min_size = 1055, 54 .index = -1 55 }; 56 57 static struct nvram_os_partition oops_log_partition = { 58 .name = "lnx,oops-log", 59 .req_size = 4000, 60 .min_size = 2000, 61 .index = -1 62 }; 63 64 static const char *pseries_nvram_os_partitions[] = { 65 "ibm,rtas-log", 66 "lnx,oops-log", 67 NULL 68 }; 69 70 static void oops_to_nvram(struct kmsg_dumper *dumper, 71 enum kmsg_dump_reason reason); 72 73 static struct kmsg_dumper nvram_kmsg_dumper = { 74 .dump = oops_to_nvram 75 }; 76 77 /* See clobbering_unread_rtas_event() */ 78 #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */ 79 static unsigned long last_unread_rtas_event; /* timestamp */ 80 81 /* 82 * For capturing and compressing an oops or panic report... 83 84 * big_oops_buf[] holds the uncompressed text we're capturing. 85 * 86 * oops_buf[] holds the compressed text, preceded by a prefix. 87 * The prefix is just a u16 holding the length of the compressed* text. 88 * (*Or uncompressed, if compression fails.) oops_buf[] gets written 89 * to NVRAM. 90 * 91 * oops_len points to the prefix. oops_data points to the compressed text. 92 * 93 * +- oops_buf 94 * | +- oops_data 95 * v v 96 * +------------+-----------------------------------------------+ 97 * | length | text | 98 * | (2 bytes) | (oops_data_sz bytes) | 99 * +------------+-----------------------------------------------+ 100 * ^ 101 * +- oops_len 102 * 103 * We preallocate these buffers during init to avoid kmalloc during oops/panic. 104 */ 105 static size_t big_oops_buf_sz; 106 static char *big_oops_buf, *oops_buf; 107 static u16 *oops_len; 108 static char *oops_data; 109 static size_t oops_data_sz; 110 111 /* Compression parameters */ 112 #define COMPR_LEVEL 6 113 #define WINDOW_BITS 12 114 #define MEM_LEVEL 4 115 static struct z_stream_s stream; 116 117 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) 118 { 119 unsigned int i; 120 unsigned long len; 121 int done; 122 unsigned long flags; 123 char *p = buf; 124 125 126 if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) 127 return -ENODEV; 128 129 if (*index >= nvram_size) 130 return 0; 131 132 i = *index; 133 if (i + count > nvram_size) 134 count = nvram_size - i; 135 136 spin_lock_irqsave(&nvram_lock, flags); 137 138 for (; count != 0; count -= len) { 139 len = count; 140 if (len > NVRW_CNT) 141 len = NVRW_CNT; 142 143 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), 144 len) != 0) || len != done) { 145 spin_unlock_irqrestore(&nvram_lock, flags); 146 return -EIO; 147 } 148 149 memcpy(p, nvram_buf, len); 150 151 p += len; 152 i += len; 153 } 154 155 spin_unlock_irqrestore(&nvram_lock, flags); 156 157 *index = i; 158 return p - buf; 159 } 160 161 static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) 162 { 163 unsigned int i; 164 unsigned long len; 165 int done; 166 unsigned long flags; 167 const char *p = buf; 168 169 if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) 170 return -ENODEV; 171 172 if (*index >= nvram_size) 173 return 0; 174 175 i = *index; 176 if (i + count > nvram_size) 177 count = nvram_size - i; 178 179 spin_lock_irqsave(&nvram_lock, flags); 180 181 for (; count != 0; count -= len) { 182 len = count; 183 if (len > NVRW_CNT) 184 len = NVRW_CNT; 185 186 memcpy(nvram_buf, p, len); 187 188 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), 189 len) != 0) || len != done) { 190 spin_unlock_irqrestore(&nvram_lock, flags); 191 return -EIO; 192 } 193 194 p += len; 195 i += len; 196 } 197 spin_unlock_irqrestore(&nvram_lock, flags); 198 199 *index = i; 200 return p - buf; 201 } 202 203 static ssize_t pSeries_nvram_get_size(void) 204 { 205 return nvram_size ? nvram_size : -ENODEV; 206 } 207 208 209 /* nvram_write_os_partition, nvram_write_error_log 210 * 211 * We need to buffer the error logs into nvram to ensure that we have 212 * the failure information to decode. If we have a severe error there 213 * is no way to guarantee that the OS or the machine is in a state to 214 * get back to user land and write the error to disk. For example if 215 * the SCSI device driver causes a Machine Check by writing to a bad 216 * IO address, there is no way of guaranteeing that the device driver 217 * is in any state that is would also be able to write the error data 218 * captured to disk, thus we buffer it in NVRAM for analysis on the 219 * next boot. 220 * 221 * In NVRAM the partition containing the error log buffer will looks like: 222 * Header (in bytes): 223 * +-----------+----------+--------+------------+------------------+ 224 * | signature | checksum | length | name | data | 225 * |0 |1 |2 3|4 15|16 length-1| 226 * +-----------+----------+--------+------------+------------------+ 227 * 228 * The 'data' section would look like (in bytes): 229 * +--------------+------------+-----------------------------------+ 230 * | event_logged | sequence # | error log | 231 * |0 3|4 7|8 error_log_size-1| 232 * +--------------+------------+-----------------------------------+ 233 * 234 * event_logged: 0 if event has not been logged to syslog, 1 if it has 235 * sequence #: The unique sequence # for each event. (until it wraps) 236 * error log: The error log from event_scan 237 */ 238 int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, 239 int length, unsigned int err_type, unsigned int error_log_cnt) 240 { 241 int rc; 242 loff_t tmp_index; 243 struct err_log_info info; 244 245 if (part->index == -1) { 246 return -ESPIPE; 247 } 248 249 if (length > part->size) { 250 length = part->size; 251 } 252 253 info.error_type = err_type; 254 info.seq_num = error_log_cnt; 255 256 tmp_index = part->index; 257 258 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); 259 if (rc <= 0) { 260 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); 261 return rc; 262 } 263 264 rc = ppc_md.nvram_write(buff, length, &tmp_index); 265 if (rc <= 0) { 266 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); 267 return rc; 268 } 269 270 return 0; 271 } 272 273 int nvram_write_error_log(char * buff, int length, 274 unsigned int err_type, unsigned int error_log_cnt) 275 { 276 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, 277 err_type, error_log_cnt); 278 if (!rc) 279 last_unread_rtas_event = get_seconds(); 280 return rc; 281 } 282 283 /* nvram_read_error_log 284 * 285 * Reads nvram for error log for at most 'length' 286 */ 287 int nvram_read_error_log(char * buff, int length, 288 unsigned int * err_type, unsigned int * error_log_cnt) 289 { 290 int rc; 291 loff_t tmp_index; 292 struct err_log_info info; 293 294 if (rtas_log_partition.index == -1) 295 return -1; 296 297 if (length > rtas_log_partition.size) 298 length = rtas_log_partition.size; 299 300 tmp_index = rtas_log_partition.index; 301 302 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); 303 if (rc <= 0) { 304 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 305 return rc; 306 } 307 308 rc = ppc_md.nvram_read(buff, length, &tmp_index); 309 if (rc <= 0) { 310 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); 311 return rc; 312 } 313 314 *error_log_cnt = info.seq_num; 315 *err_type = info.error_type; 316 317 return 0; 318 } 319 320 /* This doesn't actually zero anything, but it sets the event_logged 321 * word to tell that this event is safely in syslog. 322 */ 323 int nvram_clear_error_log(void) 324 { 325 loff_t tmp_index; 326 int clear_word = ERR_FLAG_ALREADY_LOGGED; 327 int rc; 328 329 if (rtas_log_partition.index == -1) 330 return -1; 331 332 tmp_index = rtas_log_partition.index; 333 334 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); 335 if (rc <= 0) { 336 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); 337 return rc; 338 } 339 last_unread_rtas_event = 0; 340 341 return 0; 342 } 343 344 /* pseries_nvram_init_os_partition 345 * 346 * This sets up a partition with an "OS" signature. 347 * 348 * The general strategy is the following: 349 * 1.) If a partition with the indicated name already exists... 350 * - If it's large enough, use it. 351 * - Otherwise, recycle it and keep going. 352 * 2.) Search for a free partition that is large enough. 353 * 3.) If there's not a free partition large enough, recycle any obsolete 354 * OS partitions and try again. 355 * 4.) Will first try getting a chunk that will satisfy the requested size. 356 * 5.) If a chunk of the requested size cannot be allocated, then try finding 357 * a chunk that will satisfy the minum needed. 358 * 359 * Returns 0 on success, else -1. 360 */ 361 static int __init pseries_nvram_init_os_partition(struct nvram_os_partition 362 *part) 363 { 364 loff_t p; 365 int size; 366 367 /* Scan nvram for partitions */ 368 nvram_scan_partitions(); 369 370 /* Look for ours */ 371 p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size); 372 373 /* Found one but too small, remove it */ 374 if (p && size < part->min_size) { 375 pr_info("nvram: Found too small %s partition," 376 " removing it...\n", part->name); 377 nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL); 378 p = 0; 379 } 380 381 /* Create one if we didn't find */ 382 if (!p) { 383 p = nvram_create_partition(part->name, NVRAM_SIG_OS, 384 part->req_size, part->min_size); 385 if (p == -ENOSPC) { 386 pr_info("nvram: No room to create %s partition, " 387 "deleting any obsolete OS partitions...\n", 388 part->name); 389 nvram_remove_partition(NULL, NVRAM_SIG_OS, 390 pseries_nvram_os_partitions); 391 p = nvram_create_partition(part->name, NVRAM_SIG_OS, 392 part->req_size, part->min_size); 393 } 394 } 395 396 if (p <= 0) { 397 pr_err("nvram: Failed to find or create %s" 398 " partition, err %d\n", part->name, (int)p); 399 return -1; 400 } 401 402 part->index = p; 403 part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info); 404 405 return 0; 406 } 407 408 static void __init nvram_init_oops_partition(int rtas_partition_exists) 409 { 410 int rc; 411 412 rc = pseries_nvram_init_os_partition(&oops_log_partition); 413 if (rc != 0) { 414 if (!rtas_partition_exists) 415 return; 416 pr_notice("nvram: Using %s partition to log both" 417 " RTAS errors and oops/panic reports\n", 418 rtas_log_partition.name); 419 memcpy(&oops_log_partition, &rtas_log_partition, 420 sizeof(rtas_log_partition)); 421 } 422 oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL); 423 if (!oops_buf) { 424 pr_err("nvram: No memory for %s partition\n", 425 oops_log_partition.name); 426 return; 427 } 428 oops_len = (u16*) oops_buf; 429 oops_data = oops_buf + sizeof(u16); 430 oops_data_sz = oops_log_partition.size - sizeof(u16); 431 432 /* 433 * Figure compression (preceded by elimination of each line's <n> 434 * severity prefix) will reduce the oops/panic report to at most 435 * 45% of its original size. 436 */ 437 big_oops_buf_sz = (oops_data_sz * 100) / 45; 438 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); 439 if (big_oops_buf) { 440 stream.workspace = kmalloc(zlib_deflate_workspacesize( 441 WINDOW_BITS, MEM_LEVEL), GFP_KERNEL); 442 if (!stream.workspace) { 443 pr_err("nvram: No memory for compression workspace; " 444 "skipping compression of %s partition data\n", 445 oops_log_partition.name); 446 kfree(big_oops_buf); 447 big_oops_buf = NULL; 448 } 449 } else { 450 pr_err("No memory for uncompressed %s data; " 451 "skipping compression\n", oops_log_partition.name); 452 stream.workspace = NULL; 453 } 454 455 rc = kmsg_dump_register(&nvram_kmsg_dumper); 456 if (rc != 0) { 457 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc); 458 kfree(oops_buf); 459 kfree(big_oops_buf); 460 kfree(stream.workspace); 461 } 462 } 463 464 static int __init pseries_nvram_init_log_partitions(void) 465 { 466 int rc; 467 468 rc = pseries_nvram_init_os_partition(&rtas_log_partition); 469 nvram_init_oops_partition(rc == 0); 470 return 0; 471 } 472 machine_arch_initcall(pseries, pseries_nvram_init_log_partitions); 473 474 int __init pSeries_nvram_init(void) 475 { 476 struct device_node *nvram; 477 const unsigned int *nbytes_p; 478 unsigned int proplen; 479 480 nvram = of_find_node_by_type(NULL, "nvram"); 481 if (nvram == NULL) 482 return -ENODEV; 483 484 nbytes_p = of_get_property(nvram, "#bytes", &proplen); 485 if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { 486 of_node_put(nvram); 487 return -EIO; 488 } 489 490 nvram_size = *nbytes_p; 491 492 nvram_fetch = rtas_token("nvram-fetch"); 493 nvram_store = rtas_token("nvram-store"); 494 printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); 495 of_node_put(nvram); 496 497 ppc_md.nvram_read = pSeries_nvram_read; 498 ppc_md.nvram_write = pSeries_nvram_write; 499 ppc_md.nvram_size = pSeries_nvram_get_size; 500 501 return 0; 502 } 503 504 /* 505 * Are we using the ibm,rtas-log for oops/panic reports? And if so, 506 * would logging this oops/panic overwrite an RTAS event that rtas_errd 507 * hasn't had a chance to read and process? Return 1 if so, else 0. 508 * 509 * We assume that if rtas_errd hasn't read the RTAS event in 510 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. 511 */ 512 static int clobbering_unread_rtas_event(void) 513 { 514 return (oops_log_partition.index == rtas_log_partition.index 515 && last_unread_rtas_event 516 && get_seconds() - last_unread_rtas_event <= 517 NVRAM_RTAS_READ_TIMEOUT); 518 } 519 520 /* Derived from logfs_compress() */ 521 static int nvram_compress(const void *in, void *out, size_t inlen, 522 size_t outlen) 523 { 524 int err, ret; 525 526 ret = -EIO; 527 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, 528 MEM_LEVEL, Z_DEFAULT_STRATEGY); 529 if (err != Z_OK) 530 goto error; 531 532 stream.next_in = in; 533 stream.avail_in = inlen; 534 stream.total_in = 0; 535 stream.next_out = out; 536 stream.avail_out = outlen; 537 stream.total_out = 0; 538 539 err = zlib_deflate(&stream, Z_FINISH); 540 if (err != Z_STREAM_END) 541 goto error; 542 543 err = zlib_deflateEnd(&stream); 544 if (err != Z_OK) 545 goto error; 546 547 if (stream.total_out >= stream.total_in) 548 goto error; 549 550 ret = stream.total_out; 551 error: 552 return ret; 553 } 554 555 /* Compress the text from big_oops_buf into oops_buf. */ 556 static int zip_oops(size_t text_len) 557 { 558 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, 559 oops_data_sz); 560 if (zipped_len < 0) { 561 pr_err("nvram: compression failed; returned %d\n", zipped_len); 562 pr_err("nvram: logging uncompressed oops/panic report\n"); 563 return -1; 564 } 565 *oops_len = (u16) zipped_len; 566 return 0; 567 } 568 569 /* 570 * This is our kmsg_dump callback, called after an oops or panic report 571 * has been written to the printk buffer. We want to capture as much 572 * of the printk buffer as possible. First, capture as much as we can 573 * that we think will compress sufficiently to fit in the lnx,oops-log 574 * partition. If that's too much, go back and capture uncompressed text. 575 */ 576 static void oops_to_nvram(struct kmsg_dumper *dumper, 577 enum kmsg_dump_reason reason) 578 { 579 static unsigned int oops_count = 0; 580 static bool panicking = false; 581 static DEFINE_SPINLOCK(lock); 582 unsigned long flags; 583 size_t text_len; 584 unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; 585 int rc = -1; 586 587 switch (reason) { 588 case KMSG_DUMP_RESTART: 589 case KMSG_DUMP_HALT: 590 case KMSG_DUMP_POWEROFF: 591 /* These are almost always orderly shutdowns. */ 592 return; 593 case KMSG_DUMP_OOPS: 594 break; 595 case KMSG_DUMP_PANIC: 596 panicking = true; 597 break; 598 case KMSG_DUMP_EMERG: 599 if (panicking) 600 /* Panic report already captured. */ 601 return; 602 break; 603 default: 604 pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", 605 __FUNCTION__, (int) reason); 606 return; 607 } 608 609 if (clobbering_unread_rtas_event()) 610 return; 611 612 if (!spin_trylock_irqsave(&lock, flags)) 613 return; 614 615 if (big_oops_buf) { 616 kmsg_dump_get_buffer(dumper, false, 617 big_oops_buf, big_oops_buf_sz, &text_len); 618 rc = zip_oops(text_len); 619 } 620 if (rc != 0) { 621 kmsg_dump_rewind(dumper); 622 kmsg_dump_get_buffer(dumper, true, 623 oops_data, oops_data_sz, &text_len); 624 err_type = ERR_TYPE_KERNEL_PANIC; 625 *oops_len = (u16) text_len; 626 } 627 628 (void) nvram_write_os_partition(&oops_log_partition, oops_buf, 629 (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); 630 631 spin_unlock_irqrestore(&lock, flags); 632 } 633