1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/printk.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * Modified to make sys_syslog() more flexible: added commands to 8 * return the last 4k of kernel messages, regardless of whether 9 * they've been read or not. Added option to suppress kernel printk's 10 * to the console. Added hook for sending the console messages 11 * elsewhere, in preparation for a serial line console (someday). 12 * Ted Ts'o, 2/11/93. 13 * Modified for sysctl support, 1/8/97, Chris Horn. 14 * Fixed SMP synchronization, 08/08/99, Manfred Spraul 15 * manfred@colorfullife.com 16 * Rewrote bits to get rid of console_lock 17 * 01Mar01 Andrew Morton 18 */ 19 20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 22 #include <linux/kernel.h> 23 #include <linux/mm.h> 24 #include <linux/tty.h> 25 #include <linux/tty_driver.h> 26 #include <linux/console.h> 27 #include <linux/init.h> 28 #include <linux/jiffies.h> 29 #include <linux/nmi.h> 30 #include <linux/module.h> 31 #include <linux/moduleparam.h> 32 #include <linux/delay.h> 33 #include <linux/smp.h> 34 #include <linux/security.h> 35 #include <linux/memblock.h> 36 #include <linux/syscalls.h> 37 #include <linux/crash_core.h> 38 #include <linux/ratelimit.h> 39 #include <linux/kmsg_dump.h> 40 #include <linux/syslog.h> 41 #include <linux/cpu.h> 42 #include <linux/rculist.h> 43 #include <linux/poll.h> 44 #include <linux/irq_work.h> 45 #include <linux/ctype.h> 46 #include <linux/uio.h> 47 #include <linux/sched/clock.h> 48 #include <linux/sched/debug.h> 49 #include <linux/sched/task_stack.h> 50 51 #include <linux/uaccess.h> 52 #include <asm/sections.h> 53 54 #include <trace/events/initcall.h> 55 #define CREATE_TRACE_POINTS 56 #include <trace/events/printk.h> 57 58 #include "printk_ringbuffer.h" 59 #include "console_cmdline.h" 60 #include "braille.h" 61 #include "internal.h" 62 63 int console_printk[4] = { 64 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ 65 MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ 66 CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ 67 CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ 68 }; 69 EXPORT_SYMBOL_GPL(console_printk); 70 71 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); 72 EXPORT_SYMBOL(ignore_console_lock_warning); 73 74 /* 75 * Low level drivers may need that to know if they can schedule in 76 * their unblank() callback or not. So let's export it. 77 */ 78 int oops_in_progress; 79 EXPORT_SYMBOL(oops_in_progress); 80 81 /* 82 * console_sem protects the console_drivers list, and also 83 * provides serialisation for access to the entire console 84 * driver system. 85 */ 86 static DEFINE_SEMAPHORE(console_sem); 87 struct console *console_drivers; 88 EXPORT_SYMBOL_GPL(console_drivers); 89 90 /* 91 * System may need to suppress printk message under certain 92 * circumstances, like after kernel panic happens. 93 */ 94 int __read_mostly suppress_printk; 95 96 #ifdef CONFIG_LOCKDEP 97 static struct lockdep_map console_lock_dep_map = { 98 .name = "console_lock" 99 }; 100 #endif 101 102 enum devkmsg_log_bits { 103 __DEVKMSG_LOG_BIT_ON = 0, 104 __DEVKMSG_LOG_BIT_OFF, 105 __DEVKMSG_LOG_BIT_LOCK, 106 }; 107 108 enum devkmsg_log_masks { 109 DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), 110 DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), 111 DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), 112 }; 113 114 /* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ 115 #define DEVKMSG_LOG_MASK_DEFAULT 0 116 117 static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 118 119 static int __control_devkmsg(char *str) 120 { 121 size_t len; 122 123 if (!str) 124 return -EINVAL; 125 126 len = str_has_prefix(str, "on"); 127 if (len) { 128 devkmsg_log = DEVKMSG_LOG_MASK_ON; 129 return len; 130 } 131 132 len = str_has_prefix(str, "off"); 133 if (len) { 134 devkmsg_log = DEVKMSG_LOG_MASK_OFF; 135 return len; 136 } 137 138 len = str_has_prefix(str, "ratelimit"); 139 if (len) { 140 devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 141 return len; 142 } 143 144 return -EINVAL; 145 } 146 147 static int __init control_devkmsg(char *str) 148 { 149 if (__control_devkmsg(str) < 0) 150 return 1; 151 152 /* 153 * Set sysctl string accordingly: 154 */ 155 if (devkmsg_log == DEVKMSG_LOG_MASK_ON) 156 strcpy(devkmsg_log_str, "on"); 157 else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) 158 strcpy(devkmsg_log_str, "off"); 159 /* else "ratelimit" which is set by default. */ 160 161 /* 162 * Sysctl cannot change it anymore. The kernel command line setting of 163 * this parameter is to force the setting to be permanent throughout the 164 * runtime of the system. This is a precation measure against userspace 165 * trying to be a smarta** and attempting to change it up on us. 166 */ 167 devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; 168 169 return 0; 170 } 171 __setup("printk.devkmsg=", control_devkmsg); 172 173 char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; 174 175 int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, 176 void *buffer, size_t *lenp, loff_t *ppos) 177 { 178 char old_str[DEVKMSG_STR_MAX_SIZE]; 179 unsigned int old; 180 int err; 181 182 if (write) { 183 if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) 184 return -EINVAL; 185 186 old = devkmsg_log; 187 strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); 188 } 189 190 err = proc_dostring(table, write, buffer, lenp, ppos); 191 if (err) 192 return err; 193 194 if (write) { 195 err = __control_devkmsg(devkmsg_log_str); 196 197 /* 198 * Do not accept an unknown string OR a known string with 199 * trailing crap... 200 */ 201 if (err < 0 || (err + 1 != *lenp)) { 202 203 /* ... and restore old setting. */ 204 devkmsg_log = old; 205 strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); 206 207 return -EINVAL; 208 } 209 } 210 211 return 0; 212 } 213 214 /* Number of registered extended console drivers. */ 215 static int nr_ext_console_drivers; 216 217 /* 218 * Helper macros to handle lockdep when locking/unlocking console_sem. We use 219 * macros instead of functions so that _RET_IP_ contains useful information. 220 */ 221 #define down_console_sem() do { \ 222 down(&console_sem);\ 223 mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ 224 } while (0) 225 226 static int __down_trylock_console_sem(unsigned long ip) 227 { 228 int lock_failed; 229 unsigned long flags; 230 231 /* 232 * Here and in __up_console_sem() we need to be in safe mode, 233 * because spindump/WARN/etc from under console ->lock will 234 * deadlock in printk()->down_trylock_console_sem() otherwise. 235 */ 236 printk_safe_enter_irqsave(flags); 237 lock_failed = down_trylock(&console_sem); 238 printk_safe_exit_irqrestore(flags); 239 240 if (lock_failed) 241 return 1; 242 mutex_acquire(&console_lock_dep_map, 0, 1, ip); 243 return 0; 244 } 245 #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) 246 247 static void __up_console_sem(unsigned long ip) 248 { 249 unsigned long flags; 250 251 mutex_release(&console_lock_dep_map, ip); 252 253 printk_safe_enter_irqsave(flags); 254 up(&console_sem); 255 printk_safe_exit_irqrestore(flags); 256 } 257 #define up_console_sem() __up_console_sem(_RET_IP_) 258 259 /* 260 * This is used for debugging the mess that is the VT code by 261 * keeping track if we have the console semaphore held. It's 262 * definitely not the perfect debug tool (we don't know if _WE_ 263 * hold it and are racing, but it helps tracking those weird code 264 * paths in the console code where we end up in places I want 265 * locked without the console sempahore held). 266 */ 267 static int console_locked, console_suspended; 268 269 /* 270 * If exclusive_console is non-NULL then only this console is to be printed to. 271 */ 272 static struct console *exclusive_console; 273 274 /* 275 * Array of consoles built from command line options (console=) 276 */ 277 278 #define MAX_CMDLINECONSOLES 8 279 280 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; 281 282 static int preferred_console = -1; 283 static bool has_preferred_console; 284 int console_set_on_cmdline; 285 EXPORT_SYMBOL(console_set_on_cmdline); 286 287 /* Flag: console code may call schedule() */ 288 static int console_may_schedule; 289 290 enum con_msg_format_flags { 291 MSG_FORMAT_DEFAULT = 0, 292 MSG_FORMAT_SYSLOG = (1 << 0), 293 }; 294 295 static int console_msg_format = MSG_FORMAT_DEFAULT; 296 297 /* 298 * The printk log buffer consists of a sequenced collection of records, each 299 * containing variable length message text. Every record also contains its 300 * own meta-data (@info). 301 * 302 * Every record meta-data carries the timestamp in microseconds, as well as 303 * the standard userspace syslog level and syslog facility. The usual kernel 304 * messages use LOG_KERN; userspace-injected messages always carry a matching 305 * syslog facility, by default LOG_USER. The origin of every message can be 306 * reliably determined that way. 307 * 308 * The human readable log message of a record is available in @text, the 309 * length of the message text in @text_len. The stored message is not 310 * terminated. 311 * 312 * Optionally, a record can carry a dictionary of properties (key/value 313 * pairs), to provide userspace with a machine-readable message context. 314 * 315 * Examples for well-defined, commonly used property names are: 316 * DEVICE=b12:8 device identifier 317 * b12:8 block dev_t 318 * c127:3 char dev_t 319 * n8 netdev ifindex 320 * +sound:card0 subsystem:devname 321 * SUBSYSTEM=pci driver-core subsystem name 322 * 323 * Valid characters in property names are [a-zA-Z0-9.-_]. Property names 324 * and values are terminated by a '\0' character. 325 * 326 * Example of record values: 327 * record.text_buf = "it's a line" (unterminated) 328 * record.info.seq = 56 329 * record.info.ts_nsec = 36863 330 * record.info.text_len = 11 331 * record.info.facility = 0 (LOG_KERN) 332 * record.info.flags = 0 333 * record.info.level = 3 (LOG_ERR) 334 * record.info.caller_id = 299 (task 299) 335 * record.info.dev_info.subsystem = "pci" (terminated) 336 * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) 337 * 338 * The 'struct printk_info' buffer must never be directly exported to 339 * userspace, it is a kernel-private implementation detail that might 340 * need to be changed in the future, when the requirements change. 341 * 342 * /dev/kmsg exports the structured data in the following line format: 343 * "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n" 344 * 345 * Users of the export format should ignore possible additional values 346 * separated by ',', and find the message after the ';' character. 347 * 348 * The optional key/value pairs are attached as continuation lines starting 349 * with a space character and terminated by a newline. All possible 350 * non-prinatable characters are escaped in the "\xff" notation. 351 */ 352 353 enum log_flags { 354 LOG_NEWLINE = 2, /* text ended with a newline */ 355 LOG_CONT = 8, /* text is a fragment of a continuation line */ 356 }; 357 358 /* 359 * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken 360 * within the scheduler's rq lock. It must be released before calling 361 * console_unlock() or anything else that might wake up a process. 362 */ 363 DEFINE_RAW_SPINLOCK(logbuf_lock); 364 365 /* 366 * Helper macros to lock/unlock logbuf_lock and switch between 367 * printk-safe/unsafe modes. 368 */ 369 #define logbuf_lock_irq() \ 370 do { \ 371 printk_safe_enter_irq(); \ 372 raw_spin_lock(&logbuf_lock); \ 373 } while (0) 374 375 #define logbuf_unlock_irq() \ 376 do { \ 377 raw_spin_unlock(&logbuf_lock); \ 378 printk_safe_exit_irq(); \ 379 } while (0) 380 381 #define logbuf_lock_irqsave(flags) \ 382 do { \ 383 printk_safe_enter_irqsave(flags); \ 384 raw_spin_lock(&logbuf_lock); \ 385 } while (0) 386 387 #define logbuf_unlock_irqrestore(flags) \ 388 do { \ 389 raw_spin_unlock(&logbuf_lock); \ 390 printk_safe_exit_irqrestore(flags); \ 391 } while (0) 392 393 #ifdef CONFIG_PRINTK 394 DECLARE_WAIT_QUEUE_HEAD(log_wait); 395 /* the next printk record to read by syslog(READ) or /proc/kmsg */ 396 static u64 syslog_seq; 397 static size_t syslog_partial; 398 static bool syslog_time; 399 400 /* the next printk record to write to the console */ 401 static u64 console_seq; 402 static u64 exclusive_console_stop_seq; 403 static unsigned long console_dropped; 404 405 /* the next printk record to read after the last 'clear' command */ 406 static u64 clear_seq; 407 408 #ifdef CONFIG_PRINTK_CALLER 409 #define PREFIX_MAX 48 410 #else 411 #define PREFIX_MAX 32 412 #endif 413 #define LOG_LINE_MAX (1024 - PREFIX_MAX) 414 415 #define LOG_LEVEL(v) ((v) & 0x07) 416 #define LOG_FACILITY(v) ((v) >> 3 & 0xff) 417 418 /* record buffer */ 419 #define LOG_ALIGN __alignof__(unsigned long) 420 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 421 #define LOG_BUF_LEN_MAX (u32)(1 << 31) 422 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 423 static char *log_buf = __log_buf; 424 static u32 log_buf_len = __LOG_BUF_LEN; 425 426 /* 427 * Define the average message size. This only affects the number of 428 * descriptors that will be available. Underestimating is better than 429 * overestimating (too many available descriptors is better than not enough). 430 */ 431 #define PRB_AVGBITS 5 /* 32 character average length */ 432 433 #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS 434 #error CONFIG_LOG_BUF_SHIFT value too small. 435 #endif 436 _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, 437 PRB_AVGBITS, &__log_buf[0]); 438 439 static struct printk_ringbuffer printk_rb_dynamic; 440 441 static struct printk_ringbuffer *prb = &printk_rb_static; 442 443 /* 444 * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before 445 * per_cpu_areas are initialised. This variable is set to true when 446 * it's safe to access per-CPU data. 447 */ 448 static bool __printk_percpu_data_ready __read_mostly; 449 450 bool printk_percpu_data_ready(void) 451 { 452 return __printk_percpu_data_ready; 453 } 454 455 /* Return log buffer address */ 456 char *log_buf_addr_get(void) 457 { 458 return log_buf; 459 } 460 461 /* Return log buffer size */ 462 u32 log_buf_len_get(void) 463 { 464 return log_buf_len; 465 } 466 467 /* 468 * Define how much of the log buffer we could take at maximum. The value 469 * must be greater than two. Note that only half of the buffer is available 470 * when the index points to the middle. 471 */ 472 #define MAX_LOG_TAKE_PART 4 473 static const char trunc_msg[] = "<truncated>"; 474 475 static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) 476 { 477 /* 478 * The message should not take the whole buffer. Otherwise, it might 479 * get removed too soon. 480 */ 481 u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; 482 483 if (*text_len > max_text_len) 484 *text_len = max_text_len; 485 486 /* enable the warning message (if there is room) */ 487 *trunc_msg_len = strlen(trunc_msg); 488 if (*text_len >= *trunc_msg_len) 489 *text_len -= *trunc_msg_len; 490 else 491 *trunc_msg_len = 0; 492 } 493 494 int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); 495 496 static int syslog_action_restricted(int type) 497 { 498 if (dmesg_restrict) 499 return 1; 500 /* 501 * Unless restricted, we allow "read all" and "get buffer size" 502 * for everybody. 503 */ 504 return type != SYSLOG_ACTION_READ_ALL && 505 type != SYSLOG_ACTION_SIZE_BUFFER; 506 } 507 508 static int check_syslog_permissions(int type, int source) 509 { 510 /* 511 * If this is from /proc/kmsg and we've already opened it, then we've 512 * already done the capabilities checks at open time. 513 */ 514 if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) 515 goto ok; 516 517 if (syslog_action_restricted(type)) { 518 if (capable(CAP_SYSLOG)) 519 goto ok; 520 /* 521 * For historical reasons, accept CAP_SYS_ADMIN too, with 522 * a warning. 523 */ 524 if (capable(CAP_SYS_ADMIN)) { 525 pr_warn_once("%s (%d): Attempt to access syslog with " 526 "CAP_SYS_ADMIN but no CAP_SYSLOG " 527 "(deprecated).\n", 528 current->comm, task_pid_nr(current)); 529 goto ok; 530 } 531 return -EPERM; 532 } 533 ok: 534 return security_syslog(type); 535 } 536 537 static void append_char(char **pp, char *e, char c) 538 { 539 if (*pp < e) 540 *(*pp)++ = c; 541 } 542 543 static ssize_t info_print_ext_header(char *buf, size_t size, 544 struct printk_info *info) 545 { 546 u64 ts_usec = info->ts_nsec; 547 char caller[20]; 548 #ifdef CONFIG_PRINTK_CALLER 549 u32 id = info->caller_id; 550 551 snprintf(caller, sizeof(caller), ",caller=%c%u", 552 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 553 #else 554 caller[0] = '\0'; 555 #endif 556 557 do_div(ts_usec, 1000); 558 559 return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", 560 (info->facility << 3) | info->level, info->seq, 561 ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); 562 } 563 564 static ssize_t msg_add_ext_text(char *buf, size_t size, 565 const char *text, size_t text_len, 566 unsigned char endc) 567 { 568 char *p = buf, *e = buf + size; 569 size_t i; 570 571 /* escape non-printable characters */ 572 for (i = 0; i < text_len; i++) { 573 unsigned char c = text[i]; 574 575 if (c < ' ' || c >= 127 || c == '\\') 576 p += scnprintf(p, e - p, "\\x%02x", c); 577 else 578 append_char(&p, e, c); 579 } 580 append_char(&p, e, endc); 581 582 return p - buf; 583 } 584 585 static ssize_t msg_add_dict_text(char *buf, size_t size, 586 const char *key, const char *val) 587 { 588 size_t val_len = strlen(val); 589 ssize_t len; 590 591 if (!val_len) 592 return 0; 593 594 len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ 595 len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); 596 len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); 597 598 return len; 599 } 600 601 static ssize_t msg_print_ext_body(char *buf, size_t size, 602 char *text, size_t text_len, 603 struct dev_printk_info *dev_info) 604 { 605 ssize_t len; 606 607 len = msg_add_ext_text(buf, size, text, text_len, '\n'); 608 609 if (!dev_info) 610 goto out; 611 612 len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", 613 dev_info->subsystem); 614 len += msg_add_dict_text(buf + len, size - len, "DEVICE", 615 dev_info->device); 616 out: 617 return len; 618 } 619 620 /* /dev/kmsg - userspace message inject/listen interface */ 621 struct devkmsg_user { 622 u64 seq; 623 struct ratelimit_state rs; 624 struct mutex lock; 625 char buf[CONSOLE_EXT_LOG_MAX]; 626 627 struct printk_info info; 628 char text_buf[CONSOLE_EXT_LOG_MAX]; 629 struct printk_record record; 630 }; 631 632 static __printf(3, 4) __cold 633 int devkmsg_emit(int facility, int level, const char *fmt, ...) 634 { 635 va_list args; 636 int r; 637 638 va_start(args, fmt); 639 r = vprintk_emit(facility, level, NULL, fmt, args); 640 va_end(args); 641 642 return r; 643 } 644 645 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) 646 { 647 char *buf, *line; 648 int level = default_message_loglevel; 649 int facility = 1; /* LOG_USER */ 650 struct file *file = iocb->ki_filp; 651 struct devkmsg_user *user = file->private_data; 652 size_t len = iov_iter_count(from); 653 ssize_t ret = len; 654 655 if (!user || len > LOG_LINE_MAX) 656 return -EINVAL; 657 658 /* Ignore when user logging is disabled. */ 659 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 660 return len; 661 662 /* Ratelimit when not explicitly enabled. */ 663 if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { 664 if (!___ratelimit(&user->rs, current->comm)) 665 return ret; 666 } 667 668 buf = kmalloc(len+1, GFP_KERNEL); 669 if (buf == NULL) 670 return -ENOMEM; 671 672 buf[len] = '\0'; 673 if (!copy_from_iter_full(buf, len, from)) { 674 kfree(buf); 675 return -EFAULT; 676 } 677 678 /* 679 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace 680 * the decimal value represents 32bit, the lower 3 bit are the log 681 * level, the rest are the log facility. 682 * 683 * If no prefix or no userspace facility is specified, we 684 * enforce LOG_USER, to be able to reliably distinguish 685 * kernel-generated messages from userspace-injected ones. 686 */ 687 line = buf; 688 if (line[0] == '<') { 689 char *endp = NULL; 690 unsigned int u; 691 692 u = simple_strtoul(line + 1, &endp, 10); 693 if (endp && endp[0] == '>') { 694 level = LOG_LEVEL(u); 695 if (LOG_FACILITY(u) != 0) 696 facility = LOG_FACILITY(u); 697 endp++; 698 line = endp; 699 } 700 } 701 702 devkmsg_emit(facility, level, "%s", line); 703 kfree(buf); 704 return ret; 705 } 706 707 static ssize_t devkmsg_read(struct file *file, char __user *buf, 708 size_t count, loff_t *ppos) 709 { 710 struct devkmsg_user *user = file->private_data; 711 struct printk_record *r = &user->record; 712 size_t len; 713 ssize_t ret; 714 715 if (!user) 716 return -EBADF; 717 718 ret = mutex_lock_interruptible(&user->lock); 719 if (ret) 720 return ret; 721 722 logbuf_lock_irq(); 723 if (!prb_read_valid(prb, user->seq, r)) { 724 if (file->f_flags & O_NONBLOCK) { 725 ret = -EAGAIN; 726 logbuf_unlock_irq(); 727 goto out; 728 } 729 730 logbuf_unlock_irq(); 731 ret = wait_event_interruptible(log_wait, 732 prb_read_valid(prb, user->seq, r)); 733 if (ret) 734 goto out; 735 logbuf_lock_irq(); 736 } 737 738 if (user->seq < prb_first_valid_seq(prb)) { 739 /* our last seen message is gone, return error and reset */ 740 user->seq = prb_first_valid_seq(prb); 741 ret = -EPIPE; 742 logbuf_unlock_irq(); 743 goto out; 744 } 745 746 len = info_print_ext_header(user->buf, sizeof(user->buf), r->info); 747 len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, 748 &r->text_buf[0], r->info->text_len, 749 &r->info->dev_info); 750 751 user->seq = r->info->seq + 1; 752 logbuf_unlock_irq(); 753 754 if (len > count) { 755 ret = -EINVAL; 756 goto out; 757 } 758 759 if (copy_to_user(buf, user->buf, len)) { 760 ret = -EFAULT; 761 goto out; 762 } 763 ret = len; 764 out: 765 mutex_unlock(&user->lock); 766 return ret; 767 } 768 769 /* 770 * Be careful when modifying this function!!! 771 * 772 * Only few operations are supported because the device works only with the 773 * entire variable length messages (records). Non-standard values are 774 * returned in the other cases and has been this way for quite some time. 775 * User space applications might depend on this behavior. 776 */ 777 static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) 778 { 779 struct devkmsg_user *user = file->private_data; 780 loff_t ret = 0; 781 782 if (!user) 783 return -EBADF; 784 if (offset) 785 return -ESPIPE; 786 787 logbuf_lock_irq(); 788 switch (whence) { 789 case SEEK_SET: 790 /* the first record */ 791 user->seq = prb_first_valid_seq(prb); 792 break; 793 case SEEK_DATA: 794 /* 795 * The first record after the last SYSLOG_ACTION_CLEAR, 796 * like issued by 'dmesg -c'. Reading /dev/kmsg itself 797 * changes no global state, and does not clear anything. 798 */ 799 user->seq = clear_seq; 800 break; 801 case SEEK_END: 802 /* after the last record */ 803 user->seq = prb_next_seq(prb); 804 break; 805 default: 806 ret = -EINVAL; 807 } 808 logbuf_unlock_irq(); 809 return ret; 810 } 811 812 static __poll_t devkmsg_poll(struct file *file, poll_table *wait) 813 { 814 struct devkmsg_user *user = file->private_data; 815 __poll_t ret = 0; 816 817 if (!user) 818 return EPOLLERR|EPOLLNVAL; 819 820 poll_wait(file, &log_wait, wait); 821 822 logbuf_lock_irq(); 823 if (prb_read_valid(prb, user->seq, NULL)) { 824 /* return error when data has vanished underneath us */ 825 if (user->seq < prb_first_valid_seq(prb)) 826 ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; 827 else 828 ret = EPOLLIN|EPOLLRDNORM; 829 } 830 logbuf_unlock_irq(); 831 832 return ret; 833 } 834 835 static int devkmsg_open(struct inode *inode, struct file *file) 836 { 837 struct devkmsg_user *user; 838 int err; 839 840 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 841 return -EPERM; 842 843 /* write-only does not need any file context */ 844 if ((file->f_flags & O_ACCMODE) != O_WRONLY) { 845 err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, 846 SYSLOG_FROM_READER); 847 if (err) 848 return err; 849 } 850 851 user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); 852 if (!user) 853 return -ENOMEM; 854 855 ratelimit_default_init(&user->rs); 856 ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); 857 858 mutex_init(&user->lock); 859 860 prb_rec_init_rd(&user->record, &user->info, 861 &user->text_buf[0], sizeof(user->text_buf)); 862 863 logbuf_lock_irq(); 864 user->seq = prb_first_valid_seq(prb); 865 logbuf_unlock_irq(); 866 867 file->private_data = user; 868 return 0; 869 } 870 871 static int devkmsg_release(struct inode *inode, struct file *file) 872 { 873 struct devkmsg_user *user = file->private_data; 874 875 if (!user) 876 return 0; 877 878 ratelimit_state_exit(&user->rs); 879 880 mutex_destroy(&user->lock); 881 kfree(user); 882 return 0; 883 } 884 885 const struct file_operations kmsg_fops = { 886 .open = devkmsg_open, 887 .read = devkmsg_read, 888 .write_iter = devkmsg_write, 889 .llseek = devkmsg_llseek, 890 .poll = devkmsg_poll, 891 .release = devkmsg_release, 892 }; 893 894 #ifdef CONFIG_CRASH_CORE 895 /* 896 * This appends the listed symbols to /proc/vmcore 897 * 898 * /proc/vmcore is used by various utilities, like crash and makedumpfile to 899 * obtain access to symbols that are otherwise very difficult to locate. These 900 * symbols are specifically used so that utilities can access and extract the 901 * dmesg log from a vmcore file after a crash. 902 */ 903 void log_buf_vmcoreinfo_setup(void) 904 { 905 struct dev_printk_info *dev_info = NULL; 906 907 VMCOREINFO_SYMBOL(prb); 908 VMCOREINFO_SYMBOL(printk_rb_static); 909 VMCOREINFO_SYMBOL(clear_seq); 910 911 /* 912 * Export struct size and field offsets. User space tools can 913 * parse it and detect any changes to structure down the line. 914 */ 915 916 VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); 917 VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); 918 VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); 919 VMCOREINFO_OFFSET(printk_ringbuffer, fail); 920 921 VMCOREINFO_STRUCT_SIZE(prb_desc_ring); 922 VMCOREINFO_OFFSET(prb_desc_ring, count_bits); 923 VMCOREINFO_OFFSET(prb_desc_ring, descs); 924 VMCOREINFO_OFFSET(prb_desc_ring, infos); 925 VMCOREINFO_OFFSET(prb_desc_ring, head_id); 926 VMCOREINFO_OFFSET(prb_desc_ring, tail_id); 927 928 VMCOREINFO_STRUCT_SIZE(prb_desc); 929 VMCOREINFO_OFFSET(prb_desc, state_var); 930 VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); 931 932 VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); 933 VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); 934 VMCOREINFO_OFFSET(prb_data_blk_lpos, next); 935 936 VMCOREINFO_STRUCT_SIZE(printk_info); 937 VMCOREINFO_OFFSET(printk_info, seq); 938 VMCOREINFO_OFFSET(printk_info, ts_nsec); 939 VMCOREINFO_OFFSET(printk_info, text_len); 940 VMCOREINFO_OFFSET(printk_info, caller_id); 941 VMCOREINFO_OFFSET(printk_info, dev_info); 942 943 VMCOREINFO_STRUCT_SIZE(dev_printk_info); 944 VMCOREINFO_OFFSET(dev_printk_info, subsystem); 945 VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); 946 VMCOREINFO_OFFSET(dev_printk_info, device); 947 VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); 948 949 VMCOREINFO_STRUCT_SIZE(prb_data_ring); 950 VMCOREINFO_OFFSET(prb_data_ring, size_bits); 951 VMCOREINFO_OFFSET(prb_data_ring, data); 952 VMCOREINFO_OFFSET(prb_data_ring, head_lpos); 953 VMCOREINFO_OFFSET(prb_data_ring, tail_lpos); 954 955 VMCOREINFO_SIZE(atomic_long_t); 956 VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); 957 } 958 #endif 959 960 /* requested log_buf_len from kernel cmdline */ 961 static unsigned long __initdata new_log_buf_len; 962 963 /* we practice scaling the ring buffer by powers of 2 */ 964 static void __init log_buf_len_update(u64 size) 965 { 966 if (size > (u64)LOG_BUF_LEN_MAX) { 967 size = (u64)LOG_BUF_LEN_MAX; 968 pr_err("log_buf over 2G is not supported.\n"); 969 } 970 971 if (size) 972 size = roundup_pow_of_two(size); 973 if (size > log_buf_len) 974 new_log_buf_len = (unsigned long)size; 975 } 976 977 /* save requested log_buf_len since it's too early to process it */ 978 static int __init log_buf_len_setup(char *str) 979 { 980 u64 size; 981 982 if (!str) 983 return -EINVAL; 984 985 size = memparse(str, &str); 986 987 log_buf_len_update(size); 988 989 return 0; 990 } 991 early_param("log_buf_len", log_buf_len_setup); 992 993 #ifdef CONFIG_SMP 994 #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) 995 996 static void __init log_buf_add_cpu(void) 997 { 998 unsigned int cpu_extra; 999 1000 /* 1001 * archs should set up cpu_possible_bits properly with 1002 * set_cpu_possible() after setup_arch() but just in 1003 * case lets ensure this is valid. 1004 */ 1005 if (num_possible_cpus() == 1) 1006 return; 1007 1008 cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; 1009 1010 /* by default this will only continue through for large > 64 CPUs */ 1011 if (cpu_extra <= __LOG_BUF_LEN / 2) 1012 return; 1013 1014 pr_info("log_buf_len individual max cpu contribution: %d bytes\n", 1015 __LOG_CPU_MAX_BUF_LEN); 1016 pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", 1017 cpu_extra); 1018 pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); 1019 1020 log_buf_len_update(cpu_extra + __LOG_BUF_LEN); 1021 } 1022 #else /* !CONFIG_SMP */ 1023 static inline void log_buf_add_cpu(void) {} 1024 #endif /* CONFIG_SMP */ 1025 1026 static void __init set_percpu_data_ready(void) 1027 { 1028 printk_safe_init(); 1029 /* Make sure we set this flag only after printk_safe() init is done */ 1030 barrier(); 1031 __printk_percpu_data_ready = true; 1032 } 1033 1034 static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, 1035 struct printk_record *r) 1036 { 1037 struct prb_reserved_entry e; 1038 struct printk_record dest_r; 1039 1040 prb_rec_init_wr(&dest_r, r->info->text_len); 1041 1042 if (!prb_reserve(&e, rb, &dest_r)) 1043 return 0; 1044 1045 memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); 1046 dest_r.info->text_len = r->info->text_len; 1047 dest_r.info->facility = r->info->facility; 1048 dest_r.info->level = r->info->level; 1049 dest_r.info->flags = r->info->flags; 1050 dest_r.info->ts_nsec = r->info->ts_nsec; 1051 dest_r.info->caller_id = r->info->caller_id; 1052 memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); 1053 1054 prb_final_commit(&e); 1055 1056 return prb_record_text_space(&e); 1057 } 1058 1059 static char setup_text_buf[LOG_LINE_MAX] __initdata; 1060 1061 void __init setup_log_buf(int early) 1062 { 1063 struct printk_info *new_infos; 1064 unsigned int new_descs_count; 1065 struct prb_desc *new_descs; 1066 struct printk_info info; 1067 struct printk_record r; 1068 size_t new_descs_size; 1069 size_t new_infos_size; 1070 unsigned long flags; 1071 char *new_log_buf; 1072 unsigned int free; 1073 u64 seq; 1074 1075 /* 1076 * Some archs call setup_log_buf() multiple times - first is very 1077 * early, e.g. from setup_arch(), and second - when percpu_areas 1078 * are initialised. 1079 */ 1080 if (!early) 1081 set_percpu_data_ready(); 1082 1083 if (log_buf != __log_buf) 1084 return; 1085 1086 if (!early && !new_log_buf_len) 1087 log_buf_add_cpu(); 1088 1089 if (!new_log_buf_len) 1090 return; 1091 1092 new_descs_count = new_log_buf_len >> PRB_AVGBITS; 1093 if (new_descs_count == 0) { 1094 pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); 1095 return; 1096 } 1097 1098 new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); 1099 if (unlikely(!new_log_buf)) { 1100 pr_err("log_buf_len: %lu text bytes not available\n", 1101 new_log_buf_len); 1102 return; 1103 } 1104 1105 new_descs_size = new_descs_count * sizeof(struct prb_desc); 1106 new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); 1107 if (unlikely(!new_descs)) { 1108 pr_err("log_buf_len: %zu desc bytes not available\n", 1109 new_descs_size); 1110 goto err_free_log_buf; 1111 } 1112 1113 new_infos_size = new_descs_count * sizeof(struct printk_info); 1114 new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); 1115 if (unlikely(!new_infos)) { 1116 pr_err("log_buf_len: %zu info bytes not available\n", 1117 new_infos_size); 1118 goto err_free_descs; 1119 } 1120 1121 prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf)); 1122 1123 prb_init(&printk_rb_dynamic, 1124 new_log_buf, ilog2(new_log_buf_len), 1125 new_descs, ilog2(new_descs_count), 1126 new_infos); 1127 1128 printk_safe_enter_irqsave(flags); 1129 1130 log_buf_len = new_log_buf_len; 1131 log_buf = new_log_buf; 1132 new_log_buf_len = 0; 1133 1134 free = __LOG_BUF_LEN; 1135 prb_for_each_record(0, &printk_rb_static, seq, &r) 1136 free -= add_to_rb(&printk_rb_dynamic, &r); 1137 1138 /* 1139 * This is early enough that everything is still running on the 1140 * boot CPU and interrupts are disabled. So no new messages will 1141 * appear during the transition to the dynamic buffer. 1142 */ 1143 prb = &printk_rb_dynamic; 1144 1145 printk_safe_exit_irqrestore(flags); 1146 1147 if (seq != prb_next_seq(&printk_rb_static)) { 1148 pr_err("dropped %llu messages\n", 1149 prb_next_seq(&printk_rb_static) - seq); 1150 } 1151 1152 pr_info("log_buf_len: %u bytes\n", log_buf_len); 1153 pr_info("early log buf free: %u(%u%%)\n", 1154 free, (free * 100) / __LOG_BUF_LEN); 1155 return; 1156 1157 err_free_descs: 1158 memblock_free(__pa(new_descs), new_descs_size); 1159 err_free_log_buf: 1160 memblock_free(__pa(new_log_buf), new_log_buf_len); 1161 } 1162 1163 static bool __read_mostly ignore_loglevel; 1164 1165 static int __init ignore_loglevel_setup(char *str) 1166 { 1167 ignore_loglevel = true; 1168 pr_info("debug: ignoring loglevel setting.\n"); 1169 1170 return 0; 1171 } 1172 1173 early_param("ignore_loglevel", ignore_loglevel_setup); 1174 module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); 1175 MODULE_PARM_DESC(ignore_loglevel, 1176 "ignore loglevel setting (prints all kernel messages to the console)"); 1177 1178 static bool suppress_message_printing(int level) 1179 { 1180 return (level >= console_loglevel && !ignore_loglevel); 1181 } 1182 1183 #ifdef CONFIG_BOOT_PRINTK_DELAY 1184 1185 static int boot_delay; /* msecs delay after each printk during bootup */ 1186 static unsigned long long loops_per_msec; /* based on boot_delay */ 1187 1188 static int __init boot_delay_setup(char *str) 1189 { 1190 unsigned long lpj; 1191 1192 lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ 1193 loops_per_msec = (unsigned long long)lpj / 1000 * HZ; 1194 1195 get_option(&str, &boot_delay); 1196 if (boot_delay > 10 * 1000) 1197 boot_delay = 0; 1198 1199 pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, " 1200 "HZ: %d, loops_per_msec: %llu\n", 1201 boot_delay, preset_lpj, lpj, HZ, loops_per_msec); 1202 return 0; 1203 } 1204 early_param("boot_delay", boot_delay_setup); 1205 1206 static void boot_delay_msec(int level) 1207 { 1208 unsigned long long k; 1209 unsigned long timeout; 1210 1211 if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) 1212 || suppress_message_printing(level)) { 1213 return; 1214 } 1215 1216 k = (unsigned long long)loops_per_msec * boot_delay; 1217 1218 timeout = jiffies + msecs_to_jiffies(boot_delay); 1219 while (k) { 1220 k--; 1221 cpu_relax(); 1222 /* 1223 * use (volatile) jiffies to prevent 1224 * compiler reduction; loop termination via jiffies 1225 * is secondary and may or may not happen. 1226 */ 1227 if (time_after(jiffies, timeout)) 1228 break; 1229 touch_nmi_watchdog(); 1230 } 1231 } 1232 #else 1233 static inline void boot_delay_msec(int level) 1234 { 1235 } 1236 #endif 1237 1238 static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); 1239 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 1240 1241 static size_t print_syslog(unsigned int level, char *buf) 1242 { 1243 return sprintf(buf, "<%u>", level); 1244 } 1245 1246 static size_t print_time(u64 ts, char *buf) 1247 { 1248 unsigned long rem_nsec = do_div(ts, 1000000000); 1249 1250 return sprintf(buf, "[%5lu.%06lu]", 1251 (unsigned long)ts, rem_nsec / 1000); 1252 } 1253 1254 #ifdef CONFIG_PRINTK_CALLER 1255 static size_t print_caller(u32 id, char *buf) 1256 { 1257 char caller[12]; 1258 1259 snprintf(caller, sizeof(caller), "%c%u", 1260 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 1261 return sprintf(buf, "[%6s]", caller); 1262 } 1263 #else 1264 #define print_caller(id, buf) 0 1265 #endif 1266 1267 static size_t info_print_prefix(const struct printk_info *info, bool syslog, 1268 bool time, char *buf) 1269 { 1270 size_t len = 0; 1271 1272 if (syslog) 1273 len = print_syslog((info->facility << 3) | info->level, buf); 1274 1275 if (time) 1276 len += print_time(info->ts_nsec, buf + len); 1277 1278 len += print_caller(info->caller_id, buf + len); 1279 1280 if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) { 1281 buf[len++] = ' '; 1282 buf[len] = '\0'; 1283 } 1284 1285 return len; 1286 } 1287 1288 /* 1289 * Prepare the record for printing. The text is shifted within the given 1290 * buffer to avoid a need for another one. The following operations are 1291 * done: 1292 * 1293 * - Add prefix for each line. 1294 * - Add the trailing newline that has been removed in vprintk_store(). 1295 * - Drop truncated lines that do not longer fit into the buffer. 1296 * 1297 * Return: The length of the updated/prepared text, including the added 1298 * prefixes and the newline. The dropped line(s) are not counted. 1299 */ 1300 static size_t record_print_text(struct printk_record *r, bool syslog, 1301 bool time) 1302 { 1303 size_t text_len = r->info->text_len; 1304 size_t buf_size = r->text_buf_size; 1305 char *text = r->text_buf; 1306 char prefix[PREFIX_MAX]; 1307 bool truncated = false; 1308 size_t prefix_len; 1309 size_t line_len; 1310 size_t len = 0; 1311 char *next; 1312 1313 /* 1314 * If the message was truncated because the buffer was not large 1315 * enough, treat the available text as if it were the full text. 1316 */ 1317 if (text_len > buf_size) 1318 text_len = buf_size; 1319 1320 prefix_len = info_print_prefix(r->info, syslog, time, prefix); 1321 1322 /* 1323 * @text_len: bytes of unprocessed text 1324 * @line_len: bytes of current line _without_ newline 1325 * @text: pointer to beginning of current line 1326 * @len: number of bytes prepared in r->text_buf 1327 */ 1328 for (;;) { 1329 next = memchr(text, '\n', text_len); 1330 if (next) { 1331 line_len = next - text; 1332 } else { 1333 /* Drop truncated line(s). */ 1334 if (truncated) 1335 break; 1336 line_len = text_len; 1337 } 1338 1339 /* 1340 * Truncate the text if there is not enough space to add the 1341 * prefix and a trailing newline. 1342 */ 1343 if (len + prefix_len + text_len + 1 > buf_size) { 1344 /* Drop even the current line if no space. */ 1345 if (len + prefix_len + line_len + 1 > buf_size) 1346 break; 1347 1348 text_len = buf_size - len - prefix_len - 1; 1349 truncated = true; 1350 } 1351 1352 memmove(text + prefix_len, text, text_len); 1353 memcpy(text, prefix, prefix_len); 1354 1355 len += prefix_len + line_len + 1; 1356 1357 if (text_len == line_len) { 1358 /* 1359 * Add the trailing newline removed in 1360 * vprintk_store(). 1361 */ 1362 text[prefix_len + line_len] = '\n'; 1363 break; 1364 } 1365 1366 /* 1367 * Advance beyond the added prefix and the related line with 1368 * its newline. 1369 */ 1370 text += prefix_len + line_len + 1; 1371 1372 /* 1373 * The remaining text has only decreased by the line with its 1374 * newline. 1375 * 1376 * Note that @text_len can become zero. It happens when @text 1377 * ended with a newline (either due to truncation or the 1378 * original string ending with "\n\n"). The loop is correctly 1379 * repeated and (if not truncated) an empty line with a prefix 1380 * will be prepared. 1381 */ 1382 text_len -= line_len + 1; 1383 } 1384 1385 return len; 1386 } 1387 1388 static size_t get_record_print_text_size(struct printk_info *info, 1389 unsigned int line_count, 1390 bool syslog, bool time) 1391 { 1392 char prefix[PREFIX_MAX]; 1393 size_t prefix_len; 1394 1395 prefix_len = info_print_prefix(info, syslog, time, prefix); 1396 1397 /* 1398 * Each line will be preceded with a prefix. The intermediate 1399 * newlines are already within the text, but a final trailing 1400 * newline will be added. 1401 */ 1402 return ((prefix_len * line_count) + info->text_len + 1); 1403 } 1404 1405 static int syslog_print(char __user *buf, int size) 1406 { 1407 struct printk_info info; 1408 struct printk_record r; 1409 char *text; 1410 int len = 0; 1411 1412 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); 1413 if (!text) 1414 return -ENOMEM; 1415 1416 prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); 1417 1418 while (size > 0) { 1419 size_t n; 1420 size_t skip; 1421 1422 logbuf_lock_irq(); 1423 if (!prb_read_valid(prb, syslog_seq, &r)) { 1424 logbuf_unlock_irq(); 1425 break; 1426 } 1427 if (r.info->seq != syslog_seq) { 1428 /* message is gone, move to next valid one */ 1429 syslog_seq = r.info->seq; 1430 syslog_partial = 0; 1431 } 1432 1433 /* 1434 * To keep reading/counting partial line consistent, 1435 * use printk_time value as of the beginning of a line. 1436 */ 1437 if (!syslog_partial) 1438 syslog_time = printk_time; 1439 1440 skip = syslog_partial; 1441 n = record_print_text(&r, true, syslog_time); 1442 if (n - syslog_partial <= size) { 1443 /* message fits into buffer, move forward */ 1444 syslog_seq = r.info->seq + 1; 1445 n -= syslog_partial; 1446 syslog_partial = 0; 1447 } else if (!len){ 1448 /* partial read(), remember position */ 1449 n = size; 1450 syslog_partial += n; 1451 } else 1452 n = 0; 1453 logbuf_unlock_irq(); 1454 1455 if (!n) 1456 break; 1457 1458 if (copy_to_user(buf, text + skip, n)) { 1459 if (!len) 1460 len = -EFAULT; 1461 break; 1462 } 1463 1464 len += n; 1465 size -= n; 1466 buf += n; 1467 } 1468 1469 kfree(text); 1470 return len; 1471 } 1472 1473 static int syslog_print_all(char __user *buf, int size, bool clear) 1474 { 1475 struct printk_info info; 1476 unsigned int line_count; 1477 struct printk_record r; 1478 char *text; 1479 int len = 0; 1480 u64 seq; 1481 bool time; 1482 1483 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); 1484 if (!text) 1485 return -ENOMEM; 1486 1487 time = printk_time; 1488 logbuf_lock_irq(); 1489 /* 1490 * Find first record that fits, including all following records, 1491 * into the user-provided buffer for this dump. 1492 */ 1493 prb_for_each_info(clear_seq, prb, seq, &info, &line_count) 1494 len += get_record_print_text_size(&info, line_count, true, time); 1495 1496 /* move first record forward until length fits into the buffer */ 1497 prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { 1498 if (len <= size) 1499 break; 1500 len -= get_record_print_text_size(&info, line_count, true, time); 1501 } 1502 1503 prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); 1504 1505 len = 0; 1506 prb_for_each_record(seq, prb, seq, &r) { 1507 int textlen; 1508 1509 textlen = record_print_text(&r, true, time); 1510 1511 if (len + textlen > size) { 1512 seq--; 1513 break; 1514 } 1515 1516 logbuf_unlock_irq(); 1517 if (copy_to_user(buf + len, text, textlen)) 1518 len = -EFAULT; 1519 else 1520 len += textlen; 1521 logbuf_lock_irq(); 1522 1523 if (len < 0) 1524 break; 1525 } 1526 1527 if (clear) 1528 clear_seq = seq; 1529 logbuf_unlock_irq(); 1530 1531 kfree(text); 1532 return len; 1533 } 1534 1535 static void syslog_clear(void) 1536 { 1537 logbuf_lock_irq(); 1538 clear_seq = prb_next_seq(prb); 1539 logbuf_unlock_irq(); 1540 } 1541 1542 int do_syslog(int type, char __user *buf, int len, int source) 1543 { 1544 bool clear = false; 1545 static int saved_console_loglevel = LOGLEVEL_DEFAULT; 1546 int error; 1547 1548 error = check_syslog_permissions(type, source); 1549 if (error) 1550 return error; 1551 1552 switch (type) { 1553 case SYSLOG_ACTION_CLOSE: /* Close log */ 1554 break; 1555 case SYSLOG_ACTION_OPEN: /* Open log */ 1556 break; 1557 case SYSLOG_ACTION_READ: /* Read from log */ 1558 if (!buf || len < 0) 1559 return -EINVAL; 1560 if (!len) 1561 return 0; 1562 if (!access_ok(buf, len)) 1563 return -EFAULT; 1564 error = wait_event_interruptible(log_wait, 1565 prb_read_valid(prb, syslog_seq, NULL)); 1566 if (error) 1567 return error; 1568 error = syslog_print(buf, len); 1569 break; 1570 /* Read/clear last kernel messages */ 1571 case SYSLOG_ACTION_READ_CLEAR: 1572 clear = true; 1573 fallthrough; 1574 /* Read last kernel messages */ 1575 case SYSLOG_ACTION_READ_ALL: 1576 if (!buf || len < 0) 1577 return -EINVAL; 1578 if (!len) 1579 return 0; 1580 if (!access_ok(buf, len)) 1581 return -EFAULT; 1582 error = syslog_print_all(buf, len, clear); 1583 break; 1584 /* Clear ring buffer */ 1585 case SYSLOG_ACTION_CLEAR: 1586 syslog_clear(); 1587 break; 1588 /* Disable logging to console */ 1589 case SYSLOG_ACTION_CONSOLE_OFF: 1590 if (saved_console_loglevel == LOGLEVEL_DEFAULT) 1591 saved_console_loglevel = console_loglevel; 1592 console_loglevel = minimum_console_loglevel; 1593 break; 1594 /* Enable logging to console */ 1595 case SYSLOG_ACTION_CONSOLE_ON: 1596 if (saved_console_loglevel != LOGLEVEL_DEFAULT) { 1597 console_loglevel = saved_console_loglevel; 1598 saved_console_loglevel = LOGLEVEL_DEFAULT; 1599 } 1600 break; 1601 /* Set level of messages printed to console */ 1602 case SYSLOG_ACTION_CONSOLE_LEVEL: 1603 if (len < 1 || len > 8) 1604 return -EINVAL; 1605 if (len < minimum_console_loglevel) 1606 len = minimum_console_loglevel; 1607 console_loglevel = len; 1608 /* Implicitly re-enable logging to console */ 1609 saved_console_loglevel = LOGLEVEL_DEFAULT; 1610 break; 1611 /* Number of chars in the log buffer */ 1612 case SYSLOG_ACTION_SIZE_UNREAD: 1613 logbuf_lock_irq(); 1614 if (syslog_seq < prb_first_valid_seq(prb)) { 1615 /* messages are gone, move to first one */ 1616 syslog_seq = prb_first_valid_seq(prb); 1617 syslog_partial = 0; 1618 } 1619 if (source == SYSLOG_FROM_PROC) { 1620 /* 1621 * Short-cut for poll(/"proc/kmsg") which simply checks 1622 * for pending data, not the size; return the count of 1623 * records, not the length. 1624 */ 1625 error = prb_next_seq(prb) - syslog_seq; 1626 } else { 1627 bool time = syslog_partial ? syslog_time : printk_time; 1628 struct printk_info info; 1629 unsigned int line_count; 1630 u64 seq; 1631 1632 prb_for_each_info(syslog_seq, prb, seq, &info, 1633 &line_count) { 1634 error += get_record_print_text_size(&info, line_count, 1635 true, time); 1636 time = printk_time; 1637 } 1638 error -= syslog_partial; 1639 } 1640 logbuf_unlock_irq(); 1641 break; 1642 /* Size of the log buffer */ 1643 case SYSLOG_ACTION_SIZE_BUFFER: 1644 error = log_buf_len; 1645 break; 1646 default: 1647 error = -EINVAL; 1648 break; 1649 } 1650 1651 return error; 1652 } 1653 1654 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) 1655 { 1656 return do_syslog(type, buf, len, SYSLOG_FROM_READER); 1657 } 1658 1659 /* 1660 * Special console_lock variants that help to reduce the risk of soft-lockups. 1661 * They allow to pass console_lock to another printk() call using a busy wait. 1662 */ 1663 1664 #ifdef CONFIG_LOCKDEP 1665 static struct lockdep_map console_owner_dep_map = { 1666 .name = "console_owner" 1667 }; 1668 #endif 1669 1670 static DEFINE_RAW_SPINLOCK(console_owner_lock); 1671 static struct task_struct *console_owner; 1672 static bool console_waiter; 1673 1674 /** 1675 * console_lock_spinning_enable - mark beginning of code where another 1676 * thread might safely busy wait 1677 * 1678 * This basically converts console_lock into a spinlock. This marks 1679 * the section where the console_lock owner can not sleep, because 1680 * there may be a waiter spinning (like a spinlock). Also it must be 1681 * ready to hand over the lock at the end of the section. 1682 */ 1683 static void console_lock_spinning_enable(void) 1684 { 1685 raw_spin_lock(&console_owner_lock); 1686 console_owner = current; 1687 raw_spin_unlock(&console_owner_lock); 1688 1689 /* The waiter may spin on us after setting console_owner */ 1690 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1691 } 1692 1693 /** 1694 * console_lock_spinning_disable_and_check - mark end of code where another 1695 * thread was able to busy wait and check if there is a waiter 1696 * 1697 * This is called at the end of the section where spinning is allowed. 1698 * It has two functions. First, it is a signal that it is no longer 1699 * safe to start busy waiting for the lock. Second, it checks if 1700 * there is a busy waiter and passes the lock rights to her. 1701 * 1702 * Important: Callers lose the lock if there was a busy waiter. 1703 * They must not touch items synchronized by console_lock 1704 * in this case. 1705 * 1706 * Return: 1 if the lock rights were passed, 0 otherwise. 1707 */ 1708 static int console_lock_spinning_disable_and_check(void) 1709 { 1710 int waiter; 1711 1712 raw_spin_lock(&console_owner_lock); 1713 waiter = READ_ONCE(console_waiter); 1714 console_owner = NULL; 1715 raw_spin_unlock(&console_owner_lock); 1716 1717 if (!waiter) { 1718 spin_release(&console_owner_dep_map, _THIS_IP_); 1719 return 0; 1720 } 1721 1722 /* The waiter is now free to continue */ 1723 WRITE_ONCE(console_waiter, false); 1724 1725 spin_release(&console_owner_dep_map, _THIS_IP_); 1726 1727 /* 1728 * Hand off console_lock to waiter. The waiter will perform 1729 * the up(). After this, the waiter is the console_lock owner. 1730 */ 1731 mutex_release(&console_lock_dep_map, _THIS_IP_); 1732 return 1; 1733 } 1734 1735 /** 1736 * console_trylock_spinning - try to get console_lock by busy waiting 1737 * 1738 * This allows to busy wait for the console_lock when the current 1739 * owner is running in specially marked sections. It means that 1740 * the current owner is running and cannot reschedule until it 1741 * is ready to lose the lock. 1742 * 1743 * Return: 1 if we got the lock, 0 othrewise 1744 */ 1745 static int console_trylock_spinning(void) 1746 { 1747 struct task_struct *owner = NULL; 1748 bool waiter; 1749 bool spin = false; 1750 unsigned long flags; 1751 1752 if (console_trylock()) 1753 return 1; 1754 1755 printk_safe_enter_irqsave(flags); 1756 1757 raw_spin_lock(&console_owner_lock); 1758 owner = READ_ONCE(console_owner); 1759 waiter = READ_ONCE(console_waiter); 1760 if (!waiter && owner && owner != current) { 1761 WRITE_ONCE(console_waiter, true); 1762 spin = true; 1763 } 1764 raw_spin_unlock(&console_owner_lock); 1765 1766 /* 1767 * If there is an active printk() writing to the 1768 * consoles, instead of having it write our data too, 1769 * see if we can offload that load from the active 1770 * printer, and do some printing ourselves. 1771 * Go into a spin only if there isn't already a waiter 1772 * spinning, and there is an active printer, and 1773 * that active printer isn't us (recursive printk?). 1774 */ 1775 if (!spin) { 1776 printk_safe_exit_irqrestore(flags); 1777 return 0; 1778 } 1779 1780 /* We spin waiting for the owner to release us */ 1781 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1782 /* Owner will clear console_waiter on hand off */ 1783 while (READ_ONCE(console_waiter)) 1784 cpu_relax(); 1785 spin_release(&console_owner_dep_map, _THIS_IP_); 1786 1787 printk_safe_exit_irqrestore(flags); 1788 /* 1789 * The owner passed the console lock to us. 1790 * Since we did not spin on console lock, annotate 1791 * this as a trylock. Otherwise lockdep will 1792 * complain. 1793 */ 1794 mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); 1795 1796 return 1; 1797 } 1798 1799 /* 1800 * Call the console drivers, asking them to write out 1801 * log_buf[start] to log_buf[end - 1]. 1802 * The console_lock must be held. 1803 */ 1804 static void call_console_drivers(const char *ext_text, size_t ext_len, 1805 const char *text, size_t len) 1806 { 1807 static char dropped_text[64]; 1808 size_t dropped_len = 0; 1809 struct console *con; 1810 1811 trace_console_rcuidle(text, len); 1812 1813 if (!console_drivers) 1814 return; 1815 1816 if (console_dropped) { 1817 dropped_len = snprintf(dropped_text, sizeof(dropped_text), 1818 "** %lu printk messages dropped **\n", 1819 console_dropped); 1820 console_dropped = 0; 1821 } 1822 1823 for_each_console(con) { 1824 if (exclusive_console && con != exclusive_console) 1825 continue; 1826 if (!(con->flags & CON_ENABLED)) 1827 continue; 1828 if (!con->write) 1829 continue; 1830 if (!cpu_online(smp_processor_id()) && 1831 !(con->flags & CON_ANYTIME)) 1832 continue; 1833 if (con->flags & CON_EXTENDED) 1834 con->write(con, ext_text, ext_len); 1835 else { 1836 if (dropped_len) 1837 con->write(con, dropped_text, dropped_len); 1838 con->write(con, text, len); 1839 } 1840 } 1841 } 1842 1843 int printk_delay_msec __read_mostly; 1844 1845 static inline void printk_delay(void) 1846 { 1847 if (unlikely(printk_delay_msec)) { 1848 int m = printk_delay_msec; 1849 1850 while (m--) { 1851 mdelay(1); 1852 touch_nmi_watchdog(); 1853 } 1854 } 1855 } 1856 1857 static inline u32 printk_caller_id(void) 1858 { 1859 return in_task() ? task_pid_nr(current) : 1860 0x80000000 + raw_smp_processor_id(); 1861 } 1862 1863 /** 1864 * parse_prefix - Parse level and control flags. 1865 * 1866 * @text: The terminated text message. 1867 * @level: A pointer to the current level value, will be updated. 1868 * @lflags: A pointer to the current log flags, will be updated. 1869 * 1870 * @level may be NULL if the caller is not interested in the parsed value. 1871 * Otherwise the variable pointed to by @level must be set to 1872 * LOGLEVEL_DEFAULT in order to be updated with the parsed value. 1873 * 1874 * @lflags may be NULL if the caller is not interested in the parsed value. 1875 * Otherwise the variable pointed to by @lflags will be OR'd with the parsed 1876 * value. 1877 * 1878 * Return: The length of the parsed level and control flags. 1879 */ 1880 static u16 parse_prefix(char *text, int *level, enum log_flags *lflags) 1881 { 1882 u16 prefix_len = 0; 1883 int kern_level; 1884 1885 while (*text) { 1886 kern_level = printk_get_level(text); 1887 if (!kern_level) 1888 break; 1889 1890 switch (kern_level) { 1891 case '0' ... '7': 1892 if (level && *level == LOGLEVEL_DEFAULT) 1893 *level = kern_level - '0'; 1894 break; 1895 case 'c': /* KERN_CONT */ 1896 if (lflags) 1897 *lflags |= LOG_CONT; 1898 } 1899 1900 prefix_len += 2; 1901 text += 2; 1902 } 1903 1904 return prefix_len; 1905 } 1906 1907 static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lflags, 1908 const char *fmt, va_list args) 1909 { 1910 u16 text_len; 1911 1912 text_len = vscnprintf(text, size, fmt, args); 1913 1914 /* Mark and strip a trailing newline. */ 1915 if (text_len && text[text_len - 1] == '\n') { 1916 text_len--; 1917 *lflags |= LOG_NEWLINE; 1918 } 1919 1920 /* Strip log level and control flags. */ 1921 if (facility == 0) { 1922 u16 prefix_len; 1923 1924 prefix_len = parse_prefix(text, NULL, NULL); 1925 if (prefix_len) { 1926 text_len -= prefix_len; 1927 memmove(text, text + prefix_len, text_len); 1928 } 1929 } 1930 1931 return text_len; 1932 } 1933 1934 __printf(4, 0) 1935 int vprintk_store(int facility, int level, 1936 const struct dev_printk_info *dev_info, 1937 const char *fmt, va_list args) 1938 { 1939 const u32 caller_id = printk_caller_id(); 1940 struct prb_reserved_entry e; 1941 enum log_flags lflags = 0; 1942 struct printk_record r; 1943 u16 trunc_msg_len = 0; 1944 char prefix_buf[8]; 1945 u16 reserve_size; 1946 va_list args2; 1947 u16 text_len; 1948 u64 ts_nsec; 1949 1950 /* 1951 * Since the duration of printk() can vary depending on the message 1952 * and state of the ringbuffer, grab the timestamp now so that it is 1953 * close to the call of printk(). This provides a more deterministic 1954 * timestamp with respect to the caller. 1955 */ 1956 ts_nsec = local_clock(); 1957 1958 /* 1959 * The sprintf needs to come first since the syslog prefix might be 1960 * passed in as a parameter. An extra byte must be reserved so that 1961 * later the vscnprintf() into the reserved buffer has room for the 1962 * terminating '\0', which is not counted by vsnprintf(). 1963 */ 1964 va_copy(args2, args); 1965 reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1; 1966 va_end(args2); 1967 1968 if (reserve_size > LOG_LINE_MAX) 1969 reserve_size = LOG_LINE_MAX; 1970 1971 /* Extract log level or control flags. */ 1972 if (facility == 0) 1973 parse_prefix(&prefix_buf[0], &level, &lflags); 1974 1975 if (level == LOGLEVEL_DEFAULT) 1976 level = default_message_loglevel; 1977 1978 if (dev_info) 1979 lflags |= LOG_NEWLINE; 1980 1981 if (lflags & LOG_CONT) { 1982 prb_rec_init_wr(&r, reserve_size); 1983 if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { 1984 text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, 1985 facility, &lflags, fmt, args); 1986 r.info->text_len += text_len; 1987 1988 if (lflags & LOG_NEWLINE) { 1989 r.info->flags |= LOG_NEWLINE; 1990 prb_final_commit(&e); 1991 } else { 1992 prb_commit(&e); 1993 } 1994 1995 return text_len; 1996 } 1997 } 1998 1999 /* 2000 * Explicitly initialize the record before every prb_reserve() call. 2001 * prb_reserve_in_last() and prb_reserve() purposely invalidate the 2002 * structure when they fail. 2003 */ 2004 prb_rec_init_wr(&r, reserve_size); 2005 if (!prb_reserve(&e, prb, &r)) { 2006 /* truncate the message if it is too long for empty buffer */ 2007 truncate_msg(&reserve_size, &trunc_msg_len); 2008 2009 prb_rec_init_wr(&r, reserve_size + trunc_msg_len); 2010 if (!prb_reserve(&e, prb, &r)) 2011 return 0; 2012 } 2013 2014 /* fill message */ 2015 text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args); 2016 if (trunc_msg_len) 2017 memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); 2018 r.info->text_len = text_len + trunc_msg_len; 2019 r.info->facility = facility; 2020 r.info->level = level & 7; 2021 r.info->flags = lflags & 0x1f; 2022 r.info->ts_nsec = ts_nsec; 2023 r.info->caller_id = caller_id; 2024 if (dev_info) 2025 memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); 2026 2027 /* A message without a trailing newline can be continued. */ 2028 if (!(lflags & LOG_NEWLINE)) 2029 prb_commit(&e); 2030 else 2031 prb_final_commit(&e); 2032 2033 return (text_len + trunc_msg_len); 2034 } 2035 2036 asmlinkage int vprintk_emit(int facility, int level, 2037 const struct dev_printk_info *dev_info, 2038 const char *fmt, va_list args) 2039 { 2040 int printed_len; 2041 bool in_sched = false; 2042 unsigned long flags; 2043 2044 /* Suppress unimportant messages after panic happens */ 2045 if (unlikely(suppress_printk)) 2046 return 0; 2047 2048 if (level == LOGLEVEL_SCHED) { 2049 level = LOGLEVEL_DEFAULT; 2050 in_sched = true; 2051 } 2052 2053 boot_delay_msec(level); 2054 printk_delay(); 2055 2056 printk_safe_enter_irqsave(flags); 2057 printed_len = vprintk_store(facility, level, dev_info, fmt, args); 2058 printk_safe_exit_irqrestore(flags); 2059 2060 /* If called from the scheduler, we can not call up(). */ 2061 if (!in_sched) { 2062 /* 2063 * Disable preemption to avoid being preempted while holding 2064 * console_sem which would prevent anyone from printing to 2065 * console 2066 */ 2067 preempt_disable(); 2068 /* 2069 * Try to acquire and then immediately release the console 2070 * semaphore. The release will print out buffers and wake up 2071 * /dev/kmsg and syslog() users. 2072 */ 2073 if (console_trylock_spinning()) 2074 console_unlock(); 2075 preempt_enable(); 2076 } 2077 2078 wake_up_klogd(); 2079 return printed_len; 2080 } 2081 EXPORT_SYMBOL(vprintk_emit); 2082 2083 asmlinkage int vprintk(const char *fmt, va_list args) 2084 { 2085 return vprintk_func(fmt, args); 2086 } 2087 EXPORT_SYMBOL(vprintk); 2088 2089 int vprintk_default(const char *fmt, va_list args) 2090 { 2091 return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); 2092 } 2093 EXPORT_SYMBOL_GPL(vprintk_default); 2094 2095 /** 2096 * printk - print a kernel message 2097 * @fmt: format string 2098 * 2099 * This is printk(). It can be called from any context. We want it to work. 2100 * 2101 * We try to grab the console_lock. If we succeed, it's easy - we log the 2102 * output and call the console drivers. If we fail to get the semaphore, we 2103 * place the output into the log buffer and return. The current holder of 2104 * the console_sem will notice the new output in console_unlock(); and will 2105 * send it to the consoles before releasing the lock. 2106 * 2107 * One effect of this deferred printing is that code which calls printk() and 2108 * then changes console_loglevel may break. This is because console_loglevel 2109 * is inspected when the actual printing occurs. 2110 * 2111 * See also: 2112 * printf(3) 2113 * 2114 * See the vsnprintf() documentation for format string extensions over C99. 2115 */ 2116 asmlinkage __visible int printk(const char *fmt, ...) 2117 { 2118 va_list args; 2119 int r; 2120 2121 va_start(args, fmt); 2122 r = vprintk_func(fmt, args); 2123 va_end(args); 2124 2125 return r; 2126 } 2127 EXPORT_SYMBOL(printk); 2128 2129 #else /* CONFIG_PRINTK */ 2130 2131 #define LOG_LINE_MAX 0 2132 #define PREFIX_MAX 0 2133 #define printk_time false 2134 2135 #define prb_read_valid(rb, seq, r) false 2136 #define prb_first_valid_seq(rb) 0 2137 2138 static u64 syslog_seq; 2139 static u64 console_seq; 2140 static u64 exclusive_console_stop_seq; 2141 static unsigned long console_dropped; 2142 2143 static size_t record_print_text(const struct printk_record *r, 2144 bool syslog, bool time) 2145 { 2146 return 0; 2147 } 2148 static ssize_t info_print_ext_header(char *buf, size_t size, 2149 struct printk_info *info) 2150 { 2151 return 0; 2152 } 2153 static ssize_t msg_print_ext_body(char *buf, size_t size, 2154 char *text, size_t text_len, 2155 struct dev_printk_info *dev_info) { return 0; } 2156 static void console_lock_spinning_enable(void) { } 2157 static int console_lock_spinning_disable_and_check(void) { return 0; } 2158 static void call_console_drivers(const char *ext_text, size_t ext_len, 2159 const char *text, size_t len) {} 2160 static bool suppress_message_printing(int level) { return false; } 2161 2162 #endif /* CONFIG_PRINTK */ 2163 2164 #ifdef CONFIG_EARLY_PRINTK 2165 struct console *early_console; 2166 2167 asmlinkage __visible void early_printk(const char *fmt, ...) 2168 { 2169 va_list ap; 2170 char buf[512]; 2171 int n; 2172 2173 if (!early_console) 2174 return; 2175 2176 va_start(ap, fmt); 2177 n = vscnprintf(buf, sizeof(buf), fmt, ap); 2178 va_end(ap); 2179 2180 early_console->write(early_console, buf, n); 2181 } 2182 #endif 2183 2184 static int __add_preferred_console(char *name, int idx, char *options, 2185 char *brl_options, bool user_specified) 2186 { 2187 struct console_cmdline *c; 2188 int i; 2189 2190 /* 2191 * See if this tty is not yet registered, and 2192 * if we have a slot free. 2193 */ 2194 for (i = 0, c = console_cmdline; 2195 i < MAX_CMDLINECONSOLES && c->name[0]; 2196 i++, c++) { 2197 if (strcmp(c->name, name) == 0 && c->index == idx) { 2198 if (!brl_options) 2199 preferred_console = i; 2200 if (user_specified) 2201 c->user_specified = true; 2202 return 0; 2203 } 2204 } 2205 if (i == MAX_CMDLINECONSOLES) 2206 return -E2BIG; 2207 if (!brl_options) 2208 preferred_console = i; 2209 strlcpy(c->name, name, sizeof(c->name)); 2210 c->options = options; 2211 c->user_specified = user_specified; 2212 braille_set_options(c, brl_options); 2213 2214 c->index = idx; 2215 return 0; 2216 } 2217 2218 static int __init console_msg_format_setup(char *str) 2219 { 2220 if (!strcmp(str, "syslog")) 2221 console_msg_format = MSG_FORMAT_SYSLOG; 2222 if (!strcmp(str, "default")) 2223 console_msg_format = MSG_FORMAT_DEFAULT; 2224 return 1; 2225 } 2226 __setup("console_msg_format=", console_msg_format_setup); 2227 2228 /* 2229 * Set up a console. Called via do_early_param() in init/main.c 2230 * for each "console=" parameter in the boot command line. 2231 */ 2232 static int __init console_setup(char *str) 2233 { 2234 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ 2235 char *s, *options, *brl_options = NULL; 2236 int idx; 2237 2238 /* 2239 * console="" or console=null have been suggested as a way to 2240 * disable console output. Use ttynull that has been created 2241 * for exacly this purpose. 2242 */ 2243 if (str[0] == 0 || strcmp(str, "null") == 0) { 2244 __add_preferred_console("ttynull", 0, NULL, NULL, true); 2245 return 1; 2246 } 2247 2248 if (_braille_console_setup(&str, &brl_options)) 2249 return 1; 2250 2251 /* 2252 * Decode str into name, index, options. 2253 */ 2254 if (str[0] >= '0' && str[0] <= '9') { 2255 strcpy(buf, "ttyS"); 2256 strncpy(buf + 4, str, sizeof(buf) - 5); 2257 } else { 2258 strncpy(buf, str, sizeof(buf) - 1); 2259 } 2260 buf[sizeof(buf) - 1] = 0; 2261 options = strchr(str, ','); 2262 if (options) 2263 *(options++) = 0; 2264 #ifdef __sparc__ 2265 if (!strcmp(str, "ttya")) 2266 strcpy(buf, "ttyS0"); 2267 if (!strcmp(str, "ttyb")) 2268 strcpy(buf, "ttyS1"); 2269 #endif 2270 for (s = buf; *s; s++) 2271 if (isdigit(*s) || *s == ',') 2272 break; 2273 idx = simple_strtoul(s, NULL, 10); 2274 *s = 0; 2275 2276 __add_preferred_console(buf, idx, options, brl_options, true); 2277 console_set_on_cmdline = 1; 2278 return 1; 2279 } 2280 __setup("console=", console_setup); 2281 2282 /** 2283 * add_preferred_console - add a device to the list of preferred consoles. 2284 * @name: device name 2285 * @idx: device index 2286 * @options: options for this console 2287 * 2288 * The last preferred console added will be used for kernel messages 2289 * and stdin/out/err for init. Normally this is used by console_setup 2290 * above to handle user-supplied console arguments; however it can also 2291 * be used by arch-specific code either to override the user or more 2292 * commonly to provide a default console (ie from PROM variables) when 2293 * the user has not supplied one. 2294 */ 2295 int add_preferred_console(char *name, int idx, char *options) 2296 { 2297 return __add_preferred_console(name, idx, options, NULL, false); 2298 } 2299 2300 bool console_suspend_enabled = true; 2301 EXPORT_SYMBOL(console_suspend_enabled); 2302 2303 static int __init console_suspend_disable(char *str) 2304 { 2305 console_suspend_enabled = false; 2306 return 1; 2307 } 2308 __setup("no_console_suspend", console_suspend_disable); 2309 module_param_named(console_suspend, console_suspend_enabled, 2310 bool, S_IRUGO | S_IWUSR); 2311 MODULE_PARM_DESC(console_suspend, "suspend console during suspend" 2312 " and hibernate operations"); 2313 2314 /** 2315 * suspend_console - suspend the console subsystem 2316 * 2317 * This disables printk() while we go into suspend states 2318 */ 2319 void suspend_console(void) 2320 { 2321 if (!console_suspend_enabled) 2322 return; 2323 pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); 2324 console_lock(); 2325 console_suspended = 1; 2326 up_console_sem(); 2327 } 2328 2329 void resume_console(void) 2330 { 2331 if (!console_suspend_enabled) 2332 return; 2333 down_console_sem(); 2334 console_suspended = 0; 2335 console_unlock(); 2336 } 2337 2338 /** 2339 * console_cpu_notify - print deferred console messages after CPU hotplug 2340 * @cpu: unused 2341 * 2342 * If printk() is called from a CPU that is not online yet, the messages 2343 * will be printed on the console only if there are CON_ANYTIME consoles. 2344 * This function is called when a new CPU comes online (or fails to come 2345 * up) or goes offline. 2346 */ 2347 static int console_cpu_notify(unsigned int cpu) 2348 { 2349 if (!cpuhp_tasks_frozen) { 2350 /* If trylock fails, someone else is doing the printing */ 2351 if (console_trylock()) 2352 console_unlock(); 2353 } 2354 return 0; 2355 } 2356 2357 /** 2358 * console_lock - lock the console system for exclusive use. 2359 * 2360 * Acquires a lock which guarantees that the caller has 2361 * exclusive access to the console system and the console_drivers list. 2362 * 2363 * Can sleep, returns nothing. 2364 */ 2365 void console_lock(void) 2366 { 2367 might_sleep(); 2368 2369 down_console_sem(); 2370 if (console_suspended) 2371 return; 2372 console_locked = 1; 2373 console_may_schedule = 1; 2374 } 2375 EXPORT_SYMBOL(console_lock); 2376 2377 /** 2378 * console_trylock - try to lock the console system for exclusive use. 2379 * 2380 * Try to acquire a lock which guarantees that the caller has exclusive 2381 * access to the console system and the console_drivers list. 2382 * 2383 * returns 1 on success, and 0 on failure to acquire the lock. 2384 */ 2385 int console_trylock(void) 2386 { 2387 if (down_trylock_console_sem()) 2388 return 0; 2389 if (console_suspended) { 2390 up_console_sem(); 2391 return 0; 2392 } 2393 console_locked = 1; 2394 console_may_schedule = 0; 2395 return 1; 2396 } 2397 EXPORT_SYMBOL(console_trylock); 2398 2399 int is_console_locked(void) 2400 { 2401 return console_locked; 2402 } 2403 EXPORT_SYMBOL(is_console_locked); 2404 2405 /* 2406 * Check if we have any console that is capable of printing while cpu is 2407 * booting or shutting down. Requires console_sem. 2408 */ 2409 static int have_callable_console(void) 2410 { 2411 struct console *con; 2412 2413 for_each_console(con) 2414 if ((con->flags & CON_ENABLED) && 2415 (con->flags & CON_ANYTIME)) 2416 return 1; 2417 2418 return 0; 2419 } 2420 2421 /* 2422 * Can we actually use the console at this time on this cpu? 2423 * 2424 * Console drivers may assume that per-cpu resources have been allocated. So 2425 * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't 2426 * call them until this CPU is officially up. 2427 */ 2428 static inline int can_use_console(void) 2429 { 2430 return cpu_online(raw_smp_processor_id()) || have_callable_console(); 2431 } 2432 2433 /** 2434 * console_unlock - unlock the console system 2435 * 2436 * Releases the console_lock which the caller holds on the console system 2437 * and the console driver list. 2438 * 2439 * While the console_lock was held, console output may have been buffered 2440 * by printk(). If this is the case, console_unlock(); emits 2441 * the output prior to releasing the lock. 2442 * 2443 * If there is output waiting, we wake /dev/kmsg and syslog() users. 2444 * 2445 * console_unlock(); may be called from any context. 2446 */ 2447 void console_unlock(void) 2448 { 2449 static char ext_text[CONSOLE_EXT_LOG_MAX]; 2450 static char text[LOG_LINE_MAX + PREFIX_MAX]; 2451 unsigned long flags; 2452 bool do_cond_resched, retry; 2453 struct printk_info info; 2454 struct printk_record r; 2455 2456 if (console_suspended) { 2457 up_console_sem(); 2458 return; 2459 } 2460 2461 prb_rec_init_rd(&r, &info, text, sizeof(text)); 2462 2463 /* 2464 * Console drivers are called with interrupts disabled, so 2465 * @console_may_schedule should be cleared before; however, we may 2466 * end up dumping a lot of lines, for example, if called from 2467 * console registration path, and should invoke cond_resched() 2468 * between lines if allowable. Not doing so can cause a very long 2469 * scheduling stall on a slow console leading to RCU stall and 2470 * softlockup warnings which exacerbate the issue with more 2471 * messages practically incapacitating the system. 2472 * 2473 * console_trylock() is not able to detect the preemptive 2474 * context reliably. Therefore the value must be stored before 2475 * and cleared after the "again" goto label. 2476 */ 2477 do_cond_resched = console_may_schedule; 2478 again: 2479 console_may_schedule = 0; 2480 2481 /* 2482 * We released the console_sem lock, so we need to recheck if 2483 * cpu is online and (if not) is there at least one CON_ANYTIME 2484 * console. 2485 */ 2486 if (!can_use_console()) { 2487 console_locked = 0; 2488 up_console_sem(); 2489 return; 2490 } 2491 2492 for (;;) { 2493 size_t ext_len = 0; 2494 size_t len; 2495 2496 printk_safe_enter_irqsave(flags); 2497 raw_spin_lock(&logbuf_lock); 2498 skip: 2499 if (!prb_read_valid(prb, console_seq, &r)) 2500 break; 2501 2502 if (console_seq != r.info->seq) { 2503 console_dropped += r.info->seq - console_seq; 2504 console_seq = r.info->seq; 2505 } 2506 2507 if (suppress_message_printing(r.info->level)) { 2508 /* 2509 * Skip record we have buffered and already printed 2510 * directly to the console when we received it, and 2511 * record that has level above the console loglevel. 2512 */ 2513 console_seq++; 2514 goto skip; 2515 } 2516 2517 /* Output to all consoles once old messages replayed. */ 2518 if (unlikely(exclusive_console && 2519 console_seq >= exclusive_console_stop_seq)) { 2520 exclusive_console = NULL; 2521 } 2522 2523 /* 2524 * Handle extended console text first because later 2525 * record_print_text() will modify the record buffer in-place. 2526 */ 2527 if (nr_ext_console_drivers) { 2528 ext_len = info_print_ext_header(ext_text, 2529 sizeof(ext_text), 2530 r.info); 2531 ext_len += msg_print_ext_body(ext_text + ext_len, 2532 sizeof(ext_text) - ext_len, 2533 &r.text_buf[0], 2534 r.info->text_len, 2535 &r.info->dev_info); 2536 } 2537 len = record_print_text(&r, 2538 console_msg_format & MSG_FORMAT_SYSLOG, 2539 printk_time); 2540 console_seq++; 2541 raw_spin_unlock(&logbuf_lock); 2542 2543 /* 2544 * While actively printing out messages, if another printk() 2545 * were to occur on another CPU, it may wait for this one to 2546 * finish. This task can not be preempted if there is a 2547 * waiter waiting to take over. 2548 */ 2549 console_lock_spinning_enable(); 2550 2551 stop_critical_timings(); /* don't trace print latency */ 2552 call_console_drivers(ext_text, ext_len, text, len); 2553 start_critical_timings(); 2554 2555 if (console_lock_spinning_disable_and_check()) { 2556 printk_safe_exit_irqrestore(flags); 2557 return; 2558 } 2559 2560 printk_safe_exit_irqrestore(flags); 2561 2562 if (do_cond_resched) 2563 cond_resched(); 2564 } 2565 2566 console_locked = 0; 2567 2568 raw_spin_unlock(&logbuf_lock); 2569 2570 up_console_sem(); 2571 2572 /* 2573 * Someone could have filled up the buffer again, so re-check if there's 2574 * something to flush. In case we cannot trylock the console_sem again, 2575 * there's a new owner and the console_unlock() from them will do the 2576 * flush, no worries. 2577 */ 2578 raw_spin_lock(&logbuf_lock); 2579 retry = prb_read_valid(prb, console_seq, NULL); 2580 raw_spin_unlock(&logbuf_lock); 2581 printk_safe_exit_irqrestore(flags); 2582 2583 if (retry && console_trylock()) 2584 goto again; 2585 } 2586 EXPORT_SYMBOL(console_unlock); 2587 2588 /** 2589 * console_conditional_schedule - yield the CPU if required 2590 * 2591 * If the console code is currently allowed to sleep, and 2592 * if this CPU should yield the CPU to another task, do 2593 * so here. 2594 * 2595 * Must be called within console_lock();. 2596 */ 2597 void __sched console_conditional_schedule(void) 2598 { 2599 if (console_may_schedule) 2600 cond_resched(); 2601 } 2602 EXPORT_SYMBOL(console_conditional_schedule); 2603 2604 void console_unblank(void) 2605 { 2606 struct console *c; 2607 2608 /* 2609 * console_unblank can no longer be called in interrupt context unless 2610 * oops_in_progress is set to 1.. 2611 */ 2612 if (oops_in_progress) { 2613 if (down_trylock_console_sem() != 0) 2614 return; 2615 } else 2616 console_lock(); 2617 2618 console_locked = 1; 2619 console_may_schedule = 0; 2620 for_each_console(c) 2621 if ((c->flags & CON_ENABLED) && c->unblank) 2622 c->unblank(); 2623 console_unlock(); 2624 } 2625 2626 /** 2627 * console_flush_on_panic - flush console content on panic 2628 * @mode: flush all messages in buffer or just the pending ones 2629 * 2630 * Immediately output all pending messages no matter what. 2631 */ 2632 void console_flush_on_panic(enum con_flush_mode mode) 2633 { 2634 /* 2635 * If someone else is holding the console lock, trylock will fail 2636 * and may_schedule may be set. Ignore and proceed to unlock so 2637 * that messages are flushed out. As this can be called from any 2638 * context and we don't want to get preempted while flushing, 2639 * ensure may_schedule is cleared. 2640 */ 2641 console_trylock(); 2642 console_may_schedule = 0; 2643 2644 if (mode == CONSOLE_REPLAY_ALL) { 2645 unsigned long flags; 2646 2647 logbuf_lock_irqsave(flags); 2648 console_seq = prb_first_valid_seq(prb); 2649 logbuf_unlock_irqrestore(flags); 2650 } 2651 console_unlock(); 2652 } 2653 2654 /* 2655 * Return the console tty driver structure and its associated index 2656 */ 2657 struct tty_driver *console_device(int *index) 2658 { 2659 struct console *c; 2660 struct tty_driver *driver = NULL; 2661 2662 console_lock(); 2663 for_each_console(c) { 2664 if (!c->device) 2665 continue; 2666 driver = c->device(c, index); 2667 if (driver) 2668 break; 2669 } 2670 console_unlock(); 2671 return driver; 2672 } 2673 2674 /* 2675 * Prevent further output on the passed console device so that (for example) 2676 * serial drivers can disable console output before suspending a port, and can 2677 * re-enable output afterwards. 2678 */ 2679 void console_stop(struct console *console) 2680 { 2681 console_lock(); 2682 console->flags &= ~CON_ENABLED; 2683 console_unlock(); 2684 } 2685 EXPORT_SYMBOL(console_stop); 2686 2687 void console_start(struct console *console) 2688 { 2689 console_lock(); 2690 console->flags |= CON_ENABLED; 2691 console_unlock(); 2692 } 2693 EXPORT_SYMBOL(console_start); 2694 2695 static int __read_mostly keep_bootcon; 2696 2697 static int __init keep_bootcon_setup(char *str) 2698 { 2699 keep_bootcon = 1; 2700 pr_info("debug: skip boot console de-registration.\n"); 2701 2702 return 0; 2703 } 2704 2705 early_param("keep_bootcon", keep_bootcon_setup); 2706 2707 /* 2708 * This is called by register_console() to try to match 2709 * the newly registered console with any of the ones selected 2710 * by either the command line or add_preferred_console() and 2711 * setup/enable it. 2712 * 2713 * Care need to be taken with consoles that are statically 2714 * enabled such as netconsole 2715 */ 2716 static int try_enable_new_console(struct console *newcon, bool user_specified) 2717 { 2718 struct console_cmdline *c; 2719 int i, err; 2720 2721 for (i = 0, c = console_cmdline; 2722 i < MAX_CMDLINECONSOLES && c->name[0]; 2723 i++, c++) { 2724 if (c->user_specified != user_specified) 2725 continue; 2726 if (!newcon->match || 2727 newcon->match(newcon, c->name, c->index, c->options) != 0) { 2728 /* default matching */ 2729 BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); 2730 if (strcmp(c->name, newcon->name) != 0) 2731 continue; 2732 if (newcon->index >= 0 && 2733 newcon->index != c->index) 2734 continue; 2735 if (newcon->index < 0) 2736 newcon->index = c->index; 2737 2738 if (_braille_register_console(newcon, c)) 2739 return 0; 2740 2741 if (newcon->setup && 2742 (err = newcon->setup(newcon, c->options)) != 0) 2743 return err; 2744 } 2745 newcon->flags |= CON_ENABLED; 2746 if (i == preferred_console) { 2747 newcon->flags |= CON_CONSDEV; 2748 has_preferred_console = true; 2749 } 2750 return 0; 2751 } 2752 2753 /* 2754 * Some consoles, such as pstore and netconsole, can be enabled even 2755 * without matching. Accept the pre-enabled consoles only when match() 2756 * and setup() had a chance to be called. 2757 */ 2758 if (newcon->flags & CON_ENABLED && c->user_specified == user_specified) 2759 return 0; 2760 2761 return -ENOENT; 2762 } 2763 2764 /* 2765 * The console driver calls this routine during kernel initialization 2766 * to register the console printing procedure with printk() and to 2767 * print any messages that were printed by the kernel before the 2768 * console driver was initialized. 2769 * 2770 * This can happen pretty early during the boot process (because of 2771 * early_printk) - sometimes before setup_arch() completes - be careful 2772 * of what kernel features are used - they may not be initialised yet. 2773 * 2774 * There are two types of consoles - bootconsoles (early_printk) and 2775 * "real" consoles (everything which is not a bootconsole) which are 2776 * handled differently. 2777 * - Any number of bootconsoles can be registered at any time. 2778 * - As soon as a "real" console is registered, all bootconsoles 2779 * will be unregistered automatically. 2780 * - Once a "real" console is registered, any attempt to register a 2781 * bootconsoles will be rejected 2782 */ 2783 void register_console(struct console *newcon) 2784 { 2785 unsigned long flags; 2786 struct console *bcon = NULL; 2787 int err; 2788 2789 for_each_console(bcon) { 2790 if (WARN(bcon == newcon, "console '%s%d' already registered\n", 2791 bcon->name, bcon->index)) 2792 return; 2793 } 2794 2795 /* 2796 * before we register a new CON_BOOT console, make sure we don't 2797 * already have a valid console 2798 */ 2799 if (newcon->flags & CON_BOOT) { 2800 for_each_console(bcon) { 2801 if (!(bcon->flags & CON_BOOT)) { 2802 pr_info("Too late to register bootconsole %s%d\n", 2803 newcon->name, newcon->index); 2804 return; 2805 } 2806 } 2807 } 2808 2809 if (console_drivers && console_drivers->flags & CON_BOOT) 2810 bcon = console_drivers; 2811 2812 if (!has_preferred_console || bcon || !console_drivers) 2813 has_preferred_console = preferred_console >= 0; 2814 2815 /* 2816 * See if we want to use this console driver. If we 2817 * didn't select a console we take the first one 2818 * that registers here. 2819 */ 2820 if (!has_preferred_console) { 2821 if (newcon->index < 0) 2822 newcon->index = 0; 2823 if (newcon->setup == NULL || 2824 newcon->setup(newcon, NULL) == 0) { 2825 newcon->flags |= CON_ENABLED; 2826 if (newcon->device) { 2827 newcon->flags |= CON_CONSDEV; 2828 has_preferred_console = true; 2829 } 2830 } 2831 } 2832 2833 /* See if this console matches one we selected on the command line */ 2834 err = try_enable_new_console(newcon, true); 2835 2836 /* If not, try to match against the platform default(s) */ 2837 if (err == -ENOENT) 2838 err = try_enable_new_console(newcon, false); 2839 2840 /* printk() messages are not printed to the Braille console. */ 2841 if (err || newcon->flags & CON_BRL) 2842 return; 2843 2844 /* 2845 * If we have a bootconsole, and are switching to a real console, 2846 * don't print everything out again, since when the boot console, and 2847 * the real console are the same physical device, it's annoying to 2848 * see the beginning boot messages twice 2849 */ 2850 if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) 2851 newcon->flags &= ~CON_PRINTBUFFER; 2852 2853 /* 2854 * Put this console in the list - keep the 2855 * preferred driver at the head of the list. 2856 */ 2857 console_lock(); 2858 if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { 2859 newcon->next = console_drivers; 2860 console_drivers = newcon; 2861 if (newcon->next) 2862 newcon->next->flags &= ~CON_CONSDEV; 2863 /* Ensure this flag is always set for the head of the list */ 2864 newcon->flags |= CON_CONSDEV; 2865 } else { 2866 newcon->next = console_drivers->next; 2867 console_drivers->next = newcon; 2868 } 2869 2870 if (newcon->flags & CON_EXTENDED) 2871 nr_ext_console_drivers++; 2872 2873 if (newcon->flags & CON_PRINTBUFFER) { 2874 /* 2875 * console_unlock(); will print out the buffered messages 2876 * for us. 2877 */ 2878 logbuf_lock_irqsave(flags); 2879 /* 2880 * We're about to replay the log buffer. Only do this to the 2881 * just-registered console to avoid excessive message spam to 2882 * the already-registered consoles. 2883 * 2884 * Set exclusive_console with disabled interrupts to reduce 2885 * race window with eventual console_flush_on_panic() that 2886 * ignores console_lock. 2887 */ 2888 exclusive_console = newcon; 2889 exclusive_console_stop_seq = console_seq; 2890 console_seq = syslog_seq; 2891 logbuf_unlock_irqrestore(flags); 2892 } 2893 console_unlock(); 2894 console_sysfs_notify(); 2895 2896 /* 2897 * By unregistering the bootconsoles after we enable the real console 2898 * we get the "console xxx enabled" message on all the consoles - 2899 * boot consoles, real consoles, etc - this is to ensure that end 2900 * users know there might be something in the kernel's log buffer that 2901 * went to the bootconsole (that they do not see on the real console) 2902 */ 2903 pr_info("%sconsole [%s%d] enabled\n", 2904 (newcon->flags & CON_BOOT) ? "boot" : "" , 2905 newcon->name, newcon->index); 2906 if (bcon && 2907 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && 2908 !keep_bootcon) { 2909 /* We need to iterate through all boot consoles, to make 2910 * sure we print everything out, before we unregister them. 2911 */ 2912 for_each_console(bcon) 2913 if (bcon->flags & CON_BOOT) 2914 unregister_console(bcon); 2915 } 2916 } 2917 EXPORT_SYMBOL(register_console); 2918 2919 int unregister_console(struct console *console) 2920 { 2921 struct console *con; 2922 int res; 2923 2924 pr_info("%sconsole [%s%d] disabled\n", 2925 (console->flags & CON_BOOT) ? "boot" : "" , 2926 console->name, console->index); 2927 2928 res = _braille_unregister_console(console); 2929 if (res < 0) 2930 return res; 2931 if (res > 0) 2932 return 0; 2933 2934 res = -ENODEV; 2935 console_lock(); 2936 if (console_drivers == console) { 2937 console_drivers=console->next; 2938 res = 0; 2939 } else { 2940 for_each_console(con) { 2941 if (con->next == console) { 2942 con->next = console->next; 2943 res = 0; 2944 break; 2945 } 2946 } 2947 } 2948 2949 if (res) 2950 goto out_disable_unlock; 2951 2952 if (console->flags & CON_EXTENDED) 2953 nr_ext_console_drivers--; 2954 2955 /* 2956 * If this isn't the last console and it has CON_CONSDEV set, we 2957 * need to set it on the next preferred console. 2958 */ 2959 if (console_drivers != NULL && console->flags & CON_CONSDEV) 2960 console_drivers->flags |= CON_CONSDEV; 2961 2962 console->flags &= ~CON_ENABLED; 2963 console_unlock(); 2964 console_sysfs_notify(); 2965 2966 if (console->exit) 2967 res = console->exit(console); 2968 2969 return res; 2970 2971 out_disable_unlock: 2972 console->flags &= ~CON_ENABLED; 2973 console_unlock(); 2974 2975 return res; 2976 } 2977 EXPORT_SYMBOL(unregister_console); 2978 2979 /* 2980 * Initialize the console device. This is called *early*, so 2981 * we can't necessarily depend on lots of kernel help here. 2982 * Just do some early initializations, and do the complex setup 2983 * later. 2984 */ 2985 void __init console_init(void) 2986 { 2987 int ret; 2988 initcall_t call; 2989 initcall_entry_t *ce; 2990 2991 /* Setup the default TTY line discipline. */ 2992 n_tty_init(); 2993 2994 /* 2995 * set up the console device so that later boot sequences can 2996 * inform about problems etc.. 2997 */ 2998 ce = __con_initcall_start; 2999 trace_initcall_level("console"); 3000 while (ce < __con_initcall_end) { 3001 call = initcall_from_entry(ce); 3002 trace_initcall_start(call); 3003 ret = call(); 3004 trace_initcall_finish(call, ret); 3005 ce++; 3006 } 3007 } 3008 3009 /* 3010 * Some boot consoles access data that is in the init section and which will 3011 * be discarded after the initcalls have been run. To make sure that no code 3012 * will access this data, unregister the boot consoles in a late initcall. 3013 * 3014 * If for some reason, such as deferred probe or the driver being a loadable 3015 * module, the real console hasn't registered yet at this point, there will 3016 * be a brief interval in which no messages are logged to the console, which 3017 * makes it difficult to diagnose problems that occur during this time. 3018 * 3019 * To mitigate this problem somewhat, only unregister consoles whose memory 3020 * intersects with the init section. Note that all other boot consoles will 3021 * get unregistred when the real preferred console is registered. 3022 */ 3023 static int __init printk_late_init(void) 3024 { 3025 struct console *con; 3026 int ret; 3027 3028 for_each_console(con) { 3029 if (!(con->flags & CON_BOOT)) 3030 continue; 3031 3032 /* Check addresses that might be used for enabled consoles. */ 3033 if (init_section_intersects(con, sizeof(*con)) || 3034 init_section_contains(con->write, 0) || 3035 init_section_contains(con->read, 0) || 3036 init_section_contains(con->device, 0) || 3037 init_section_contains(con->unblank, 0) || 3038 init_section_contains(con->data, 0)) { 3039 /* 3040 * Please, consider moving the reported consoles out 3041 * of the init section. 3042 */ 3043 pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n", 3044 con->name, con->index); 3045 unregister_console(con); 3046 } 3047 } 3048 ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, 3049 console_cpu_notify); 3050 WARN_ON(ret < 0); 3051 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", 3052 console_cpu_notify, NULL); 3053 WARN_ON(ret < 0); 3054 return 0; 3055 } 3056 late_initcall(printk_late_init); 3057 3058 #if defined CONFIG_PRINTK 3059 /* 3060 * Delayed printk version, for scheduler-internal messages: 3061 */ 3062 #define PRINTK_PENDING_WAKEUP 0x01 3063 #define PRINTK_PENDING_OUTPUT 0x02 3064 3065 static DEFINE_PER_CPU(int, printk_pending); 3066 3067 static void wake_up_klogd_work_func(struct irq_work *irq_work) 3068 { 3069 int pending = __this_cpu_xchg(printk_pending, 0); 3070 3071 if (pending & PRINTK_PENDING_OUTPUT) { 3072 /* If trylock fails, someone else is doing the printing */ 3073 if (console_trylock()) 3074 console_unlock(); 3075 } 3076 3077 if (pending & PRINTK_PENDING_WAKEUP) 3078 wake_up_interruptible(&log_wait); 3079 } 3080 3081 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = 3082 IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func); 3083 3084 void wake_up_klogd(void) 3085 { 3086 if (!printk_percpu_data_ready()) 3087 return; 3088 3089 preempt_disable(); 3090 if (waitqueue_active(&log_wait)) { 3091 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); 3092 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); 3093 } 3094 preempt_enable(); 3095 } 3096 3097 void defer_console_output(void) 3098 { 3099 if (!printk_percpu_data_ready()) 3100 return; 3101 3102 preempt_disable(); 3103 __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); 3104 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); 3105 preempt_enable(); 3106 } 3107 3108 int vprintk_deferred(const char *fmt, va_list args) 3109 { 3110 int r; 3111 3112 r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); 3113 defer_console_output(); 3114 3115 return r; 3116 } 3117 3118 int printk_deferred(const char *fmt, ...) 3119 { 3120 va_list args; 3121 int r; 3122 3123 va_start(args, fmt); 3124 r = vprintk_deferred(fmt, args); 3125 va_end(args); 3126 3127 return r; 3128 } 3129 3130 /* 3131 * printk rate limiting, lifted from the networking subsystem. 3132 * 3133 * This enforces a rate limit: not more than 10 kernel messages 3134 * every 5s to make a denial-of-service attack impossible. 3135 */ 3136 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); 3137 3138 int __printk_ratelimit(const char *func) 3139 { 3140 return ___ratelimit(&printk_ratelimit_state, func); 3141 } 3142 EXPORT_SYMBOL(__printk_ratelimit); 3143 3144 /** 3145 * printk_timed_ratelimit - caller-controlled printk ratelimiting 3146 * @caller_jiffies: pointer to caller's state 3147 * @interval_msecs: minimum interval between prints 3148 * 3149 * printk_timed_ratelimit() returns true if more than @interval_msecs 3150 * milliseconds have elapsed since the last time printk_timed_ratelimit() 3151 * returned true. 3152 */ 3153 bool printk_timed_ratelimit(unsigned long *caller_jiffies, 3154 unsigned int interval_msecs) 3155 { 3156 unsigned long elapsed = jiffies - *caller_jiffies; 3157 3158 if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs)) 3159 return false; 3160 3161 *caller_jiffies = jiffies; 3162 return true; 3163 } 3164 EXPORT_SYMBOL(printk_timed_ratelimit); 3165 3166 static DEFINE_SPINLOCK(dump_list_lock); 3167 static LIST_HEAD(dump_list); 3168 3169 /** 3170 * kmsg_dump_register - register a kernel log dumper. 3171 * @dumper: pointer to the kmsg_dumper structure 3172 * 3173 * Adds a kernel log dumper to the system. The dump callback in the 3174 * structure will be called when the kernel oopses or panics and must be 3175 * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise. 3176 */ 3177 int kmsg_dump_register(struct kmsg_dumper *dumper) 3178 { 3179 unsigned long flags; 3180 int err = -EBUSY; 3181 3182 /* The dump callback needs to be set */ 3183 if (!dumper->dump) 3184 return -EINVAL; 3185 3186 spin_lock_irqsave(&dump_list_lock, flags); 3187 /* Don't allow registering multiple times */ 3188 if (!dumper->registered) { 3189 dumper->registered = 1; 3190 list_add_tail_rcu(&dumper->list, &dump_list); 3191 err = 0; 3192 } 3193 spin_unlock_irqrestore(&dump_list_lock, flags); 3194 3195 return err; 3196 } 3197 EXPORT_SYMBOL_GPL(kmsg_dump_register); 3198 3199 /** 3200 * kmsg_dump_unregister - unregister a kmsg dumper. 3201 * @dumper: pointer to the kmsg_dumper structure 3202 * 3203 * Removes a dump device from the system. Returns zero on success and 3204 * %-EINVAL otherwise. 3205 */ 3206 int kmsg_dump_unregister(struct kmsg_dumper *dumper) 3207 { 3208 unsigned long flags; 3209 int err = -EINVAL; 3210 3211 spin_lock_irqsave(&dump_list_lock, flags); 3212 if (dumper->registered) { 3213 dumper->registered = 0; 3214 list_del_rcu(&dumper->list); 3215 err = 0; 3216 } 3217 spin_unlock_irqrestore(&dump_list_lock, flags); 3218 synchronize_rcu(); 3219 3220 return err; 3221 } 3222 EXPORT_SYMBOL_GPL(kmsg_dump_unregister); 3223 3224 static bool always_kmsg_dump; 3225 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); 3226 3227 const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) 3228 { 3229 switch (reason) { 3230 case KMSG_DUMP_PANIC: 3231 return "Panic"; 3232 case KMSG_DUMP_OOPS: 3233 return "Oops"; 3234 case KMSG_DUMP_EMERG: 3235 return "Emergency"; 3236 case KMSG_DUMP_SHUTDOWN: 3237 return "Shutdown"; 3238 default: 3239 return "Unknown"; 3240 } 3241 } 3242 EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); 3243 3244 /** 3245 * kmsg_dump - dump kernel log to kernel message dumpers. 3246 * @reason: the reason (oops, panic etc) for dumping 3247 * 3248 * Call each of the registered dumper's dump() callback, which can 3249 * retrieve the kmsg records with kmsg_dump_get_line() or 3250 * kmsg_dump_get_buffer(). 3251 */ 3252 void kmsg_dump(enum kmsg_dump_reason reason) 3253 { 3254 struct kmsg_dumper *dumper; 3255 unsigned long flags; 3256 3257 rcu_read_lock(); 3258 list_for_each_entry_rcu(dumper, &dump_list, list) { 3259 enum kmsg_dump_reason max_reason = dumper->max_reason; 3260 3261 /* 3262 * If client has not provided a specific max_reason, default 3263 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set. 3264 */ 3265 if (max_reason == KMSG_DUMP_UNDEF) { 3266 max_reason = always_kmsg_dump ? KMSG_DUMP_MAX : 3267 KMSG_DUMP_OOPS; 3268 } 3269 if (reason > max_reason) 3270 continue; 3271 3272 /* initialize iterator with data about the stored records */ 3273 dumper->active = true; 3274 3275 logbuf_lock_irqsave(flags); 3276 dumper->cur_seq = clear_seq; 3277 dumper->next_seq = prb_next_seq(prb); 3278 logbuf_unlock_irqrestore(flags); 3279 3280 /* invoke dumper which will iterate over records */ 3281 dumper->dump(dumper, reason); 3282 3283 /* reset iterator */ 3284 dumper->active = false; 3285 } 3286 rcu_read_unlock(); 3287 } 3288 3289 /** 3290 * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) 3291 * @dumper: registered kmsg dumper 3292 * @syslog: include the "<4>" prefixes 3293 * @line: buffer to copy the line to 3294 * @size: maximum size of the buffer 3295 * @len: length of line placed into buffer 3296 * 3297 * Start at the beginning of the kmsg buffer, with the oldest kmsg 3298 * record, and copy one record into the provided buffer. 3299 * 3300 * Consecutive calls will return the next available record moving 3301 * towards the end of the buffer with the youngest messages. 3302 * 3303 * A return value of FALSE indicates that there are no more records to 3304 * read. 3305 * 3306 * The function is similar to kmsg_dump_get_line(), but grabs no locks. 3307 */ 3308 bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, 3309 char *line, size_t size, size_t *len) 3310 { 3311 struct printk_info info; 3312 unsigned int line_count; 3313 struct printk_record r; 3314 size_t l = 0; 3315 bool ret = false; 3316 3317 prb_rec_init_rd(&r, &info, line, size); 3318 3319 if (!dumper->active) 3320 goto out; 3321 3322 /* Read text or count text lines? */ 3323 if (line) { 3324 if (!prb_read_valid(prb, dumper->cur_seq, &r)) 3325 goto out; 3326 l = record_print_text(&r, syslog, printk_time); 3327 } else { 3328 if (!prb_read_valid_info(prb, dumper->cur_seq, 3329 &info, &line_count)) { 3330 goto out; 3331 } 3332 l = get_record_print_text_size(&info, line_count, syslog, 3333 printk_time); 3334 3335 } 3336 3337 dumper->cur_seq = r.info->seq + 1; 3338 ret = true; 3339 out: 3340 if (len) 3341 *len = l; 3342 return ret; 3343 } 3344 3345 /** 3346 * kmsg_dump_get_line - retrieve one kmsg log line 3347 * @dumper: registered kmsg dumper 3348 * @syslog: include the "<4>" prefixes 3349 * @line: buffer to copy the line to 3350 * @size: maximum size of the buffer 3351 * @len: length of line placed into buffer 3352 * 3353 * Start at the beginning of the kmsg buffer, with the oldest kmsg 3354 * record, and copy one record into the provided buffer. 3355 * 3356 * Consecutive calls will return the next available record moving 3357 * towards the end of the buffer with the youngest messages. 3358 * 3359 * A return value of FALSE indicates that there are no more records to 3360 * read. 3361 */ 3362 bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, 3363 char *line, size_t size, size_t *len) 3364 { 3365 unsigned long flags; 3366 bool ret; 3367 3368 logbuf_lock_irqsave(flags); 3369 ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); 3370 logbuf_unlock_irqrestore(flags); 3371 3372 return ret; 3373 } 3374 EXPORT_SYMBOL_GPL(kmsg_dump_get_line); 3375 3376 /** 3377 * kmsg_dump_get_buffer - copy kmsg log lines 3378 * @dumper: registered kmsg dumper 3379 * @syslog: include the "<4>" prefixes 3380 * @buf: buffer to copy the line to 3381 * @size: maximum size of the buffer 3382 * @len: length of line placed into buffer 3383 * 3384 * Start at the end of the kmsg buffer and fill the provided buffer 3385 * with as many of the *youngest* kmsg records that fit into it. 3386 * If the buffer is large enough, all available kmsg records will be 3387 * copied with a single call. 3388 * 3389 * Consecutive calls will fill the buffer with the next block of 3390 * available older records, not including the earlier retrieved ones. 3391 * 3392 * A return value of FALSE indicates that there are no more records to 3393 * read. 3394 */ 3395 bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, 3396 char *buf, size_t size, size_t *len) 3397 { 3398 struct printk_info info; 3399 unsigned int line_count; 3400 struct printk_record r; 3401 unsigned long flags; 3402 u64 seq; 3403 u64 next_seq; 3404 size_t l = 0; 3405 bool ret = false; 3406 bool time = printk_time; 3407 3408 prb_rec_init_rd(&r, &info, buf, size); 3409 3410 if (!dumper->active || !buf || !size) 3411 goto out; 3412 3413 logbuf_lock_irqsave(flags); 3414 if (dumper->cur_seq < prb_first_valid_seq(prb)) { 3415 /* messages are gone, move to first available one */ 3416 dumper->cur_seq = prb_first_valid_seq(prb); 3417 } 3418 3419 /* last entry */ 3420 if (dumper->cur_seq >= dumper->next_seq) { 3421 logbuf_unlock_irqrestore(flags); 3422 goto out; 3423 } 3424 3425 /* calculate length of entire buffer */ 3426 seq = dumper->cur_seq; 3427 while (prb_read_valid_info(prb, seq, &info, &line_count)) { 3428 if (r.info->seq >= dumper->next_seq) 3429 break; 3430 l += get_record_print_text_size(&info, line_count, true, time); 3431 seq = r.info->seq + 1; 3432 } 3433 3434 /* move first record forward until length fits into the buffer */ 3435 seq = dumper->cur_seq; 3436 while (l >= size && prb_read_valid_info(prb, seq, 3437 &info, &line_count)) { 3438 if (r.info->seq >= dumper->next_seq) 3439 break; 3440 l -= get_record_print_text_size(&info, line_count, true, time); 3441 seq = r.info->seq + 1; 3442 } 3443 3444 /* last message in next interation */ 3445 next_seq = seq; 3446 3447 /* actually read text into the buffer now */ 3448 l = 0; 3449 while (prb_read_valid(prb, seq, &r)) { 3450 if (r.info->seq >= dumper->next_seq) 3451 break; 3452 3453 l += record_print_text(&r, syslog, time); 3454 3455 /* adjust record to store to remaining buffer space */ 3456 prb_rec_init_rd(&r, &info, buf + l, size - l); 3457 3458 seq = r.info->seq + 1; 3459 } 3460 3461 dumper->next_seq = next_seq; 3462 ret = true; 3463 logbuf_unlock_irqrestore(flags); 3464 out: 3465 if (len) 3466 *len = l; 3467 return ret; 3468 } 3469 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); 3470 3471 /** 3472 * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) 3473 * @dumper: registered kmsg dumper 3474 * 3475 * Reset the dumper's iterator so that kmsg_dump_get_line() and 3476 * kmsg_dump_get_buffer() can be called again and used multiple 3477 * times within the same dumper.dump() callback. 3478 * 3479 * The function is similar to kmsg_dump_rewind(), but grabs no locks. 3480 */ 3481 void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) 3482 { 3483 dumper->cur_seq = clear_seq; 3484 dumper->next_seq = prb_next_seq(prb); 3485 } 3486 3487 /** 3488 * kmsg_dump_rewind - reset the iterator 3489 * @dumper: registered kmsg dumper 3490 * 3491 * Reset the dumper's iterator so that kmsg_dump_get_line() and 3492 * kmsg_dump_get_buffer() can be called again and used multiple 3493 * times within the same dumper.dump() callback. 3494 */ 3495 void kmsg_dump_rewind(struct kmsg_dumper *dumper) 3496 { 3497 unsigned long flags; 3498 3499 logbuf_lock_irqsave(flags); 3500 kmsg_dump_rewind_nolock(dumper); 3501 logbuf_unlock_irqrestore(flags); 3502 } 3503 EXPORT_SYMBOL_GPL(kmsg_dump_rewind); 3504 3505 #endif 3506