1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/printk.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * Modified to make sys_syslog() more flexible: added commands to 8 * return the last 4k of kernel messages, regardless of whether 9 * they've been read or not. Added option to suppress kernel printk's 10 * to the console. Added hook for sending the console messages 11 * elsewhere, in preparation for a serial line console (someday). 12 * Ted Ts'o, 2/11/93. 13 * Modified for sysctl support, 1/8/97, Chris Horn. 14 * Fixed SMP synchronization, 08/08/99, Manfred Spraul 15 * manfred@colorfullife.com 16 * Rewrote bits to get rid of console_lock 17 * 01Mar01 Andrew Morton 18 */ 19 20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 22 #include <linux/kernel.h> 23 #include <linux/mm.h> 24 #include <linux/tty.h> 25 #include <linux/tty_driver.h> 26 #include <linux/console.h> 27 #include <linux/init.h> 28 #include <linux/jiffies.h> 29 #include <linux/nmi.h> 30 #include <linux/module.h> 31 #include <linux/moduleparam.h> 32 #include <linux/delay.h> 33 #include <linux/smp.h> 34 #include <linux/security.h> 35 #include <linux/memblock.h> 36 #include <linux/syscalls.h> 37 #include <linux/crash_core.h> 38 #include <linux/ratelimit.h> 39 #include <linux/kmsg_dump.h> 40 #include <linux/syslog.h> 41 #include <linux/cpu.h> 42 #include <linux/rculist.h> 43 #include <linux/poll.h> 44 #include <linux/irq_work.h> 45 #include <linux/ctype.h> 46 #include <linux/uio.h> 47 #include <linux/sched/clock.h> 48 #include <linux/sched/debug.h> 49 #include <linux/sched/task_stack.h> 50 51 #include <linux/uaccess.h> 52 #include <asm/sections.h> 53 54 #include <trace/events/initcall.h> 55 #define CREATE_TRACE_POINTS 56 #include <trace/events/printk.h> 57 58 #include "printk_ringbuffer.h" 59 #include "console_cmdline.h" 60 #include "braille.h" 61 #include "internal.h" 62 63 int console_printk[4] = { 64 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ 65 MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ 66 CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ 67 CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ 68 }; 69 EXPORT_SYMBOL_GPL(console_printk); 70 71 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); 72 EXPORT_SYMBOL(ignore_console_lock_warning); 73 74 /* 75 * Low level drivers may need that to know if they can schedule in 76 * their unblank() callback or not. So let's export it. 77 */ 78 int oops_in_progress; 79 EXPORT_SYMBOL(oops_in_progress); 80 81 /* 82 * console_sem protects the console_drivers list, and also 83 * provides serialisation for access to the entire console 84 * driver system. 85 */ 86 static DEFINE_SEMAPHORE(console_sem); 87 struct console *console_drivers; 88 EXPORT_SYMBOL_GPL(console_drivers); 89 90 /* 91 * System may need to suppress printk message under certain 92 * circumstances, like after kernel panic happens. 93 */ 94 int __read_mostly suppress_printk; 95 96 #ifdef CONFIG_LOCKDEP 97 static struct lockdep_map console_lock_dep_map = { 98 .name = "console_lock" 99 }; 100 #endif 101 102 enum devkmsg_log_bits { 103 __DEVKMSG_LOG_BIT_ON = 0, 104 __DEVKMSG_LOG_BIT_OFF, 105 __DEVKMSG_LOG_BIT_LOCK, 106 }; 107 108 enum devkmsg_log_masks { 109 DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), 110 DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), 111 DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), 112 }; 113 114 /* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ 115 #define DEVKMSG_LOG_MASK_DEFAULT 0 116 117 static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 118 119 static int __control_devkmsg(char *str) 120 { 121 size_t len; 122 123 if (!str) 124 return -EINVAL; 125 126 len = str_has_prefix(str, "on"); 127 if (len) { 128 devkmsg_log = DEVKMSG_LOG_MASK_ON; 129 return len; 130 } 131 132 len = str_has_prefix(str, "off"); 133 if (len) { 134 devkmsg_log = DEVKMSG_LOG_MASK_OFF; 135 return len; 136 } 137 138 len = str_has_prefix(str, "ratelimit"); 139 if (len) { 140 devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 141 return len; 142 } 143 144 return -EINVAL; 145 } 146 147 static int __init control_devkmsg(char *str) 148 { 149 if (__control_devkmsg(str) < 0) 150 return 1; 151 152 /* 153 * Set sysctl string accordingly: 154 */ 155 if (devkmsg_log == DEVKMSG_LOG_MASK_ON) 156 strcpy(devkmsg_log_str, "on"); 157 else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) 158 strcpy(devkmsg_log_str, "off"); 159 /* else "ratelimit" which is set by default. */ 160 161 /* 162 * Sysctl cannot change it anymore. The kernel command line setting of 163 * this parameter is to force the setting to be permanent throughout the 164 * runtime of the system. This is a precation measure against userspace 165 * trying to be a smarta** and attempting to change it up on us. 166 */ 167 devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; 168 169 return 0; 170 } 171 __setup("printk.devkmsg=", control_devkmsg); 172 173 char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; 174 175 int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, 176 void *buffer, size_t *lenp, loff_t *ppos) 177 { 178 char old_str[DEVKMSG_STR_MAX_SIZE]; 179 unsigned int old; 180 int err; 181 182 if (write) { 183 if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) 184 return -EINVAL; 185 186 old = devkmsg_log; 187 strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); 188 } 189 190 err = proc_dostring(table, write, buffer, lenp, ppos); 191 if (err) 192 return err; 193 194 if (write) { 195 err = __control_devkmsg(devkmsg_log_str); 196 197 /* 198 * Do not accept an unknown string OR a known string with 199 * trailing crap... 200 */ 201 if (err < 0 || (err + 1 != *lenp)) { 202 203 /* ... and restore old setting. */ 204 devkmsg_log = old; 205 strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); 206 207 return -EINVAL; 208 } 209 } 210 211 return 0; 212 } 213 214 /* Number of registered extended console drivers. */ 215 static int nr_ext_console_drivers; 216 217 /* 218 * Helper macros to handle lockdep when locking/unlocking console_sem. We use 219 * macros instead of functions so that _RET_IP_ contains useful information. 220 */ 221 #define down_console_sem() do { \ 222 down(&console_sem);\ 223 mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ 224 } while (0) 225 226 static int __down_trylock_console_sem(unsigned long ip) 227 { 228 int lock_failed; 229 unsigned long flags; 230 231 /* 232 * Here and in __up_console_sem() we need to be in safe mode, 233 * because spindump/WARN/etc from under console ->lock will 234 * deadlock in printk()->down_trylock_console_sem() otherwise. 235 */ 236 printk_safe_enter_irqsave(flags); 237 lock_failed = down_trylock(&console_sem); 238 printk_safe_exit_irqrestore(flags); 239 240 if (lock_failed) 241 return 1; 242 mutex_acquire(&console_lock_dep_map, 0, 1, ip); 243 return 0; 244 } 245 #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) 246 247 static void __up_console_sem(unsigned long ip) 248 { 249 unsigned long flags; 250 251 mutex_release(&console_lock_dep_map, ip); 252 253 printk_safe_enter_irqsave(flags); 254 up(&console_sem); 255 printk_safe_exit_irqrestore(flags); 256 } 257 #define up_console_sem() __up_console_sem(_RET_IP_) 258 259 /* 260 * This is used for debugging the mess that is the VT code by 261 * keeping track if we have the console semaphore held. It's 262 * definitely not the perfect debug tool (we don't know if _WE_ 263 * hold it and are racing, but it helps tracking those weird code 264 * paths in the console code where we end up in places I want 265 * locked without the console semaphore held). 266 */ 267 static int console_locked, console_suspended; 268 269 /* 270 * If exclusive_console is non-NULL then only this console is to be printed to. 271 */ 272 static struct console *exclusive_console; 273 274 /* 275 * Array of consoles built from command line options (console=) 276 */ 277 278 #define MAX_CMDLINECONSOLES 8 279 280 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; 281 282 static int preferred_console = -1; 283 static bool has_preferred_console; 284 int console_set_on_cmdline; 285 EXPORT_SYMBOL(console_set_on_cmdline); 286 287 /* Flag: console code may call schedule() */ 288 static int console_may_schedule; 289 290 enum con_msg_format_flags { 291 MSG_FORMAT_DEFAULT = 0, 292 MSG_FORMAT_SYSLOG = (1 << 0), 293 }; 294 295 static int console_msg_format = MSG_FORMAT_DEFAULT; 296 297 /* 298 * The printk log buffer consists of a sequenced collection of records, each 299 * containing variable length message text. Every record also contains its 300 * own meta-data (@info). 301 * 302 * Every record meta-data carries the timestamp in microseconds, as well as 303 * the standard userspace syslog level and syslog facility. The usual kernel 304 * messages use LOG_KERN; userspace-injected messages always carry a matching 305 * syslog facility, by default LOG_USER. The origin of every message can be 306 * reliably determined that way. 307 * 308 * The human readable log message of a record is available in @text, the 309 * length of the message text in @text_len. The stored message is not 310 * terminated. 311 * 312 * Optionally, a record can carry a dictionary of properties (key/value 313 * pairs), to provide userspace with a machine-readable message context. 314 * 315 * Examples for well-defined, commonly used property names are: 316 * DEVICE=b12:8 device identifier 317 * b12:8 block dev_t 318 * c127:3 char dev_t 319 * n8 netdev ifindex 320 * +sound:card0 subsystem:devname 321 * SUBSYSTEM=pci driver-core subsystem name 322 * 323 * Valid characters in property names are [a-zA-Z0-9.-_]. Property names 324 * and values are terminated by a '\0' character. 325 * 326 * Example of record values: 327 * record.text_buf = "it's a line" (unterminated) 328 * record.info.seq = 56 329 * record.info.ts_nsec = 36863 330 * record.info.text_len = 11 331 * record.info.facility = 0 (LOG_KERN) 332 * record.info.flags = 0 333 * record.info.level = 3 (LOG_ERR) 334 * record.info.caller_id = 299 (task 299) 335 * record.info.dev_info.subsystem = "pci" (terminated) 336 * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) 337 * 338 * The 'struct printk_info' buffer must never be directly exported to 339 * userspace, it is a kernel-private implementation detail that might 340 * need to be changed in the future, when the requirements change. 341 * 342 * /dev/kmsg exports the structured data in the following line format: 343 * "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n" 344 * 345 * Users of the export format should ignore possible additional values 346 * separated by ',', and find the message after the ';' character. 347 * 348 * The optional key/value pairs are attached as continuation lines starting 349 * with a space character and terminated by a newline. All possible 350 * non-prinatable characters are escaped in the "\xff" notation. 351 */ 352 353 enum log_flags { 354 LOG_NEWLINE = 2, /* text ended with a newline */ 355 LOG_CONT = 8, /* text is a fragment of a continuation line */ 356 }; 357 358 /* syslog_lock protects syslog_* variables and write access to clear_seq. */ 359 static DEFINE_RAW_SPINLOCK(syslog_lock); 360 361 #ifdef CONFIG_PRINTK 362 DECLARE_WAIT_QUEUE_HEAD(log_wait); 363 /* All 3 protected by @syslog_lock. */ 364 /* the next printk record to read by syslog(READ) or /proc/kmsg */ 365 static u64 syslog_seq; 366 static size_t syslog_partial; 367 static bool syslog_time; 368 369 /* All 3 protected by @console_sem. */ 370 /* the next printk record to write to the console */ 371 static u64 console_seq; 372 static u64 exclusive_console_stop_seq; 373 static unsigned long console_dropped; 374 375 struct latched_seq { 376 seqcount_latch_t latch; 377 u64 val[2]; 378 }; 379 380 /* 381 * The next printk record to read after the last 'clear' command. There are 382 * two copies (updated with seqcount_latch) so that reads can locklessly 383 * access a valid value. Writers are synchronized by @syslog_lock. 384 */ 385 static struct latched_seq clear_seq = { 386 .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), 387 .val[0] = 0, 388 .val[1] = 0, 389 }; 390 391 #ifdef CONFIG_PRINTK_CALLER 392 #define PREFIX_MAX 48 393 #else 394 #define PREFIX_MAX 32 395 #endif 396 397 /* the maximum size of a formatted record (i.e. with prefix added per line) */ 398 #define CONSOLE_LOG_MAX 1024 399 400 /* the maximum size allowed to be reserved for a record */ 401 #define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) 402 403 #define LOG_LEVEL(v) ((v) & 0x07) 404 #define LOG_FACILITY(v) ((v) >> 3 & 0xff) 405 406 /* record buffer */ 407 #define LOG_ALIGN __alignof__(unsigned long) 408 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 409 #define LOG_BUF_LEN_MAX (u32)(1 << 31) 410 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 411 static char *log_buf = __log_buf; 412 static u32 log_buf_len = __LOG_BUF_LEN; 413 414 /* 415 * Define the average message size. This only affects the number of 416 * descriptors that will be available. Underestimating is better than 417 * overestimating (too many available descriptors is better than not enough). 418 */ 419 #define PRB_AVGBITS 5 /* 32 character average length */ 420 421 #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS 422 #error CONFIG_LOG_BUF_SHIFT value too small. 423 #endif 424 _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, 425 PRB_AVGBITS, &__log_buf[0]); 426 427 static struct printk_ringbuffer printk_rb_dynamic; 428 429 static struct printk_ringbuffer *prb = &printk_rb_static; 430 431 /* 432 * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before 433 * per_cpu_areas are initialised. This variable is set to true when 434 * it's safe to access per-CPU data. 435 */ 436 static bool __printk_percpu_data_ready __read_mostly; 437 438 bool printk_percpu_data_ready(void) 439 { 440 return __printk_percpu_data_ready; 441 } 442 443 /* Must be called under syslog_lock. */ 444 static void latched_seq_write(struct latched_seq *ls, u64 val) 445 { 446 raw_write_seqcount_latch(&ls->latch); 447 ls->val[0] = val; 448 raw_write_seqcount_latch(&ls->latch); 449 ls->val[1] = val; 450 } 451 452 /* Can be called from any context. */ 453 static u64 latched_seq_read_nolock(struct latched_seq *ls) 454 { 455 unsigned int seq; 456 unsigned int idx; 457 u64 val; 458 459 do { 460 seq = raw_read_seqcount_latch(&ls->latch); 461 idx = seq & 0x1; 462 val = ls->val[idx]; 463 } while (read_seqcount_latch_retry(&ls->latch, seq)); 464 465 return val; 466 } 467 468 /* Return log buffer address */ 469 char *log_buf_addr_get(void) 470 { 471 return log_buf; 472 } 473 474 /* Return log buffer size */ 475 u32 log_buf_len_get(void) 476 { 477 return log_buf_len; 478 } 479 480 /* 481 * Define how much of the log buffer we could take at maximum. The value 482 * must be greater than two. Note that only half of the buffer is available 483 * when the index points to the middle. 484 */ 485 #define MAX_LOG_TAKE_PART 4 486 static const char trunc_msg[] = "<truncated>"; 487 488 static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) 489 { 490 /* 491 * The message should not take the whole buffer. Otherwise, it might 492 * get removed too soon. 493 */ 494 u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; 495 496 if (*text_len > max_text_len) 497 *text_len = max_text_len; 498 499 /* enable the warning message (if there is room) */ 500 *trunc_msg_len = strlen(trunc_msg); 501 if (*text_len >= *trunc_msg_len) 502 *text_len -= *trunc_msg_len; 503 else 504 *trunc_msg_len = 0; 505 } 506 507 int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); 508 509 static int syslog_action_restricted(int type) 510 { 511 if (dmesg_restrict) 512 return 1; 513 /* 514 * Unless restricted, we allow "read all" and "get buffer size" 515 * for everybody. 516 */ 517 return type != SYSLOG_ACTION_READ_ALL && 518 type != SYSLOG_ACTION_SIZE_BUFFER; 519 } 520 521 static int check_syslog_permissions(int type, int source) 522 { 523 /* 524 * If this is from /proc/kmsg and we've already opened it, then we've 525 * already done the capabilities checks at open time. 526 */ 527 if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) 528 goto ok; 529 530 if (syslog_action_restricted(type)) { 531 if (capable(CAP_SYSLOG)) 532 goto ok; 533 /* 534 * For historical reasons, accept CAP_SYS_ADMIN too, with 535 * a warning. 536 */ 537 if (capable(CAP_SYS_ADMIN)) { 538 pr_warn_once("%s (%d): Attempt to access syslog with " 539 "CAP_SYS_ADMIN but no CAP_SYSLOG " 540 "(deprecated).\n", 541 current->comm, task_pid_nr(current)); 542 goto ok; 543 } 544 return -EPERM; 545 } 546 ok: 547 return security_syslog(type); 548 } 549 550 static void append_char(char **pp, char *e, char c) 551 { 552 if (*pp < e) 553 *(*pp)++ = c; 554 } 555 556 static ssize_t info_print_ext_header(char *buf, size_t size, 557 struct printk_info *info) 558 { 559 u64 ts_usec = info->ts_nsec; 560 char caller[20]; 561 #ifdef CONFIG_PRINTK_CALLER 562 u32 id = info->caller_id; 563 564 snprintf(caller, sizeof(caller), ",caller=%c%u", 565 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 566 #else 567 caller[0] = '\0'; 568 #endif 569 570 do_div(ts_usec, 1000); 571 572 return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", 573 (info->facility << 3) | info->level, info->seq, 574 ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); 575 } 576 577 static ssize_t msg_add_ext_text(char *buf, size_t size, 578 const char *text, size_t text_len, 579 unsigned char endc) 580 { 581 char *p = buf, *e = buf + size; 582 size_t i; 583 584 /* escape non-printable characters */ 585 for (i = 0; i < text_len; i++) { 586 unsigned char c = text[i]; 587 588 if (c < ' ' || c >= 127 || c == '\\') 589 p += scnprintf(p, e - p, "\\x%02x", c); 590 else 591 append_char(&p, e, c); 592 } 593 append_char(&p, e, endc); 594 595 return p - buf; 596 } 597 598 static ssize_t msg_add_dict_text(char *buf, size_t size, 599 const char *key, const char *val) 600 { 601 size_t val_len = strlen(val); 602 ssize_t len; 603 604 if (!val_len) 605 return 0; 606 607 len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ 608 len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); 609 len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); 610 611 return len; 612 } 613 614 static ssize_t msg_print_ext_body(char *buf, size_t size, 615 char *text, size_t text_len, 616 struct dev_printk_info *dev_info) 617 { 618 ssize_t len; 619 620 len = msg_add_ext_text(buf, size, text, text_len, '\n'); 621 622 if (!dev_info) 623 goto out; 624 625 len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", 626 dev_info->subsystem); 627 len += msg_add_dict_text(buf + len, size - len, "DEVICE", 628 dev_info->device); 629 out: 630 return len; 631 } 632 633 /* /dev/kmsg - userspace message inject/listen interface */ 634 struct devkmsg_user { 635 atomic64_t seq; 636 struct ratelimit_state rs; 637 struct mutex lock; 638 char buf[CONSOLE_EXT_LOG_MAX]; 639 640 struct printk_info info; 641 char text_buf[CONSOLE_EXT_LOG_MAX]; 642 struct printk_record record; 643 }; 644 645 static __printf(3, 4) __cold 646 int devkmsg_emit(int facility, int level, const char *fmt, ...) 647 { 648 va_list args; 649 int r; 650 651 va_start(args, fmt); 652 r = vprintk_emit(facility, level, NULL, fmt, args); 653 va_end(args); 654 655 return r; 656 } 657 658 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) 659 { 660 char *buf, *line; 661 int level = default_message_loglevel; 662 int facility = 1; /* LOG_USER */ 663 struct file *file = iocb->ki_filp; 664 struct devkmsg_user *user = file->private_data; 665 size_t len = iov_iter_count(from); 666 ssize_t ret = len; 667 668 if (!user || len > LOG_LINE_MAX) 669 return -EINVAL; 670 671 /* Ignore when user logging is disabled. */ 672 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 673 return len; 674 675 /* Ratelimit when not explicitly enabled. */ 676 if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { 677 if (!___ratelimit(&user->rs, current->comm)) 678 return ret; 679 } 680 681 buf = kmalloc(len+1, GFP_KERNEL); 682 if (buf == NULL) 683 return -ENOMEM; 684 685 buf[len] = '\0'; 686 if (!copy_from_iter_full(buf, len, from)) { 687 kfree(buf); 688 return -EFAULT; 689 } 690 691 /* 692 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace 693 * the decimal value represents 32bit, the lower 3 bit are the log 694 * level, the rest are the log facility. 695 * 696 * If no prefix or no userspace facility is specified, we 697 * enforce LOG_USER, to be able to reliably distinguish 698 * kernel-generated messages from userspace-injected ones. 699 */ 700 line = buf; 701 if (line[0] == '<') { 702 char *endp = NULL; 703 unsigned int u; 704 705 u = simple_strtoul(line + 1, &endp, 10); 706 if (endp && endp[0] == '>') { 707 level = LOG_LEVEL(u); 708 if (LOG_FACILITY(u) != 0) 709 facility = LOG_FACILITY(u); 710 endp++; 711 line = endp; 712 } 713 } 714 715 devkmsg_emit(facility, level, "%s", line); 716 kfree(buf); 717 return ret; 718 } 719 720 static ssize_t devkmsg_read(struct file *file, char __user *buf, 721 size_t count, loff_t *ppos) 722 { 723 struct devkmsg_user *user = file->private_data; 724 struct printk_record *r = &user->record; 725 size_t len; 726 ssize_t ret; 727 728 if (!user) 729 return -EBADF; 730 731 ret = mutex_lock_interruptible(&user->lock); 732 if (ret) 733 return ret; 734 735 printk_safe_enter_irq(); 736 if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { 737 if (file->f_flags & O_NONBLOCK) { 738 ret = -EAGAIN; 739 printk_safe_exit_irq(); 740 goto out; 741 } 742 743 printk_safe_exit_irq(); 744 ret = wait_event_interruptible(log_wait, 745 prb_read_valid(prb, atomic64_read(&user->seq), r)); 746 if (ret) 747 goto out; 748 printk_safe_enter_irq(); 749 } 750 751 if (r->info->seq != atomic64_read(&user->seq)) { 752 /* our last seen message is gone, return error and reset */ 753 atomic64_set(&user->seq, r->info->seq); 754 ret = -EPIPE; 755 printk_safe_exit_irq(); 756 goto out; 757 } 758 759 len = info_print_ext_header(user->buf, sizeof(user->buf), r->info); 760 len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, 761 &r->text_buf[0], r->info->text_len, 762 &r->info->dev_info); 763 764 atomic64_set(&user->seq, r->info->seq + 1); 765 printk_safe_exit_irq(); 766 767 if (len > count) { 768 ret = -EINVAL; 769 goto out; 770 } 771 772 if (copy_to_user(buf, user->buf, len)) { 773 ret = -EFAULT; 774 goto out; 775 } 776 ret = len; 777 out: 778 mutex_unlock(&user->lock); 779 return ret; 780 } 781 782 /* 783 * Be careful when modifying this function!!! 784 * 785 * Only few operations are supported because the device works only with the 786 * entire variable length messages (records). Non-standard values are 787 * returned in the other cases and has been this way for quite some time. 788 * User space applications might depend on this behavior. 789 */ 790 static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) 791 { 792 struct devkmsg_user *user = file->private_data; 793 loff_t ret = 0; 794 795 if (!user) 796 return -EBADF; 797 if (offset) 798 return -ESPIPE; 799 800 printk_safe_enter_irq(); 801 switch (whence) { 802 case SEEK_SET: 803 /* the first record */ 804 atomic64_set(&user->seq, prb_first_valid_seq(prb)); 805 break; 806 case SEEK_DATA: 807 /* 808 * The first record after the last SYSLOG_ACTION_CLEAR, 809 * like issued by 'dmesg -c'. Reading /dev/kmsg itself 810 * changes no global state, and does not clear anything. 811 */ 812 atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); 813 break; 814 case SEEK_END: 815 /* after the last record */ 816 atomic64_set(&user->seq, prb_next_seq(prb)); 817 break; 818 default: 819 ret = -EINVAL; 820 } 821 printk_safe_exit_irq(); 822 return ret; 823 } 824 825 static __poll_t devkmsg_poll(struct file *file, poll_table *wait) 826 { 827 struct devkmsg_user *user = file->private_data; 828 struct printk_info info; 829 __poll_t ret = 0; 830 831 if (!user) 832 return EPOLLERR|EPOLLNVAL; 833 834 poll_wait(file, &log_wait, wait); 835 836 printk_safe_enter_irq(); 837 if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { 838 /* return error when data has vanished underneath us */ 839 if (info.seq != atomic64_read(&user->seq)) 840 ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; 841 else 842 ret = EPOLLIN|EPOLLRDNORM; 843 } 844 printk_safe_exit_irq(); 845 846 return ret; 847 } 848 849 static int devkmsg_open(struct inode *inode, struct file *file) 850 { 851 struct devkmsg_user *user; 852 int err; 853 854 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 855 return -EPERM; 856 857 /* write-only does not need any file context */ 858 if ((file->f_flags & O_ACCMODE) != O_WRONLY) { 859 err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, 860 SYSLOG_FROM_READER); 861 if (err) 862 return err; 863 } 864 865 user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); 866 if (!user) 867 return -ENOMEM; 868 869 ratelimit_default_init(&user->rs); 870 ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); 871 872 mutex_init(&user->lock); 873 874 prb_rec_init_rd(&user->record, &user->info, 875 &user->text_buf[0], sizeof(user->text_buf)); 876 877 printk_safe_enter_irq(); 878 atomic64_set(&user->seq, prb_first_valid_seq(prb)); 879 printk_safe_exit_irq(); 880 881 file->private_data = user; 882 return 0; 883 } 884 885 static int devkmsg_release(struct inode *inode, struct file *file) 886 { 887 struct devkmsg_user *user = file->private_data; 888 889 if (!user) 890 return 0; 891 892 ratelimit_state_exit(&user->rs); 893 894 mutex_destroy(&user->lock); 895 kfree(user); 896 return 0; 897 } 898 899 const struct file_operations kmsg_fops = { 900 .open = devkmsg_open, 901 .read = devkmsg_read, 902 .write_iter = devkmsg_write, 903 .llseek = devkmsg_llseek, 904 .poll = devkmsg_poll, 905 .release = devkmsg_release, 906 }; 907 908 #ifdef CONFIG_CRASH_CORE 909 /* 910 * This appends the listed symbols to /proc/vmcore 911 * 912 * /proc/vmcore is used by various utilities, like crash and makedumpfile to 913 * obtain access to symbols that are otherwise very difficult to locate. These 914 * symbols are specifically used so that utilities can access and extract the 915 * dmesg log from a vmcore file after a crash. 916 */ 917 void log_buf_vmcoreinfo_setup(void) 918 { 919 struct dev_printk_info *dev_info = NULL; 920 921 VMCOREINFO_SYMBOL(prb); 922 VMCOREINFO_SYMBOL(printk_rb_static); 923 VMCOREINFO_SYMBOL(clear_seq); 924 925 /* 926 * Export struct size and field offsets. User space tools can 927 * parse it and detect any changes to structure down the line. 928 */ 929 930 VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); 931 VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); 932 VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); 933 VMCOREINFO_OFFSET(printk_ringbuffer, fail); 934 935 VMCOREINFO_STRUCT_SIZE(prb_desc_ring); 936 VMCOREINFO_OFFSET(prb_desc_ring, count_bits); 937 VMCOREINFO_OFFSET(prb_desc_ring, descs); 938 VMCOREINFO_OFFSET(prb_desc_ring, infos); 939 VMCOREINFO_OFFSET(prb_desc_ring, head_id); 940 VMCOREINFO_OFFSET(prb_desc_ring, tail_id); 941 942 VMCOREINFO_STRUCT_SIZE(prb_desc); 943 VMCOREINFO_OFFSET(prb_desc, state_var); 944 VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); 945 946 VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); 947 VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); 948 VMCOREINFO_OFFSET(prb_data_blk_lpos, next); 949 950 VMCOREINFO_STRUCT_SIZE(printk_info); 951 VMCOREINFO_OFFSET(printk_info, seq); 952 VMCOREINFO_OFFSET(printk_info, ts_nsec); 953 VMCOREINFO_OFFSET(printk_info, text_len); 954 VMCOREINFO_OFFSET(printk_info, caller_id); 955 VMCOREINFO_OFFSET(printk_info, dev_info); 956 957 VMCOREINFO_STRUCT_SIZE(dev_printk_info); 958 VMCOREINFO_OFFSET(dev_printk_info, subsystem); 959 VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); 960 VMCOREINFO_OFFSET(dev_printk_info, device); 961 VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); 962 963 VMCOREINFO_STRUCT_SIZE(prb_data_ring); 964 VMCOREINFO_OFFSET(prb_data_ring, size_bits); 965 VMCOREINFO_OFFSET(prb_data_ring, data); 966 VMCOREINFO_OFFSET(prb_data_ring, head_lpos); 967 VMCOREINFO_OFFSET(prb_data_ring, tail_lpos); 968 969 VMCOREINFO_SIZE(atomic_long_t); 970 VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); 971 972 VMCOREINFO_STRUCT_SIZE(latched_seq); 973 VMCOREINFO_OFFSET(latched_seq, val); 974 } 975 #endif 976 977 /* requested log_buf_len from kernel cmdline */ 978 static unsigned long __initdata new_log_buf_len; 979 980 /* we practice scaling the ring buffer by powers of 2 */ 981 static void __init log_buf_len_update(u64 size) 982 { 983 if (size > (u64)LOG_BUF_LEN_MAX) { 984 size = (u64)LOG_BUF_LEN_MAX; 985 pr_err("log_buf over 2G is not supported.\n"); 986 } 987 988 if (size) 989 size = roundup_pow_of_two(size); 990 if (size > log_buf_len) 991 new_log_buf_len = (unsigned long)size; 992 } 993 994 /* save requested log_buf_len since it's too early to process it */ 995 static int __init log_buf_len_setup(char *str) 996 { 997 u64 size; 998 999 if (!str) 1000 return -EINVAL; 1001 1002 size = memparse(str, &str); 1003 1004 log_buf_len_update(size); 1005 1006 return 0; 1007 } 1008 early_param("log_buf_len", log_buf_len_setup); 1009 1010 #ifdef CONFIG_SMP 1011 #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) 1012 1013 static void __init log_buf_add_cpu(void) 1014 { 1015 unsigned int cpu_extra; 1016 1017 /* 1018 * archs should set up cpu_possible_bits properly with 1019 * set_cpu_possible() after setup_arch() but just in 1020 * case lets ensure this is valid. 1021 */ 1022 if (num_possible_cpus() == 1) 1023 return; 1024 1025 cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; 1026 1027 /* by default this will only continue through for large > 64 CPUs */ 1028 if (cpu_extra <= __LOG_BUF_LEN / 2) 1029 return; 1030 1031 pr_info("log_buf_len individual max cpu contribution: %d bytes\n", 1032 __LOG_CPU_MAX_BUF_LEN); 1033 pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", 1034 cpu_extra); 1035 pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); 1036 1037 log_buf_len_update(cpu_extra + __LOG_BUF_LEN); 1038 } 1039 #else /* !CONFIG_SMP */ 1040 static inline void log_buf_add_cpu(void) {} 1041 #endif /* CONFIG_SMP */ 1042 1043 static void __init set_percpu_data_ready(void) 1044 { 1045 printk_safe_init(); 1046 /* Make sure we set this flag only after printk_safe() init is done */ 1047 barrier(); 1048 __printk_percpu_data_ready = true; 1049 } 1050 1051 static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, 1052 struct printk_record *r) 1053 { 1054 struct prb_reserved_entry e; 1055 struct printk_record dest_r; 1056 1057 prb_rec_init_wr(&dest_r, r->info->text_len); 1058 1059 if (!prb_reserve(&e, rb, &dest_r)) 1060 return 0; 1061 1062 memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); 1063 dest_r.info->text_len = r->info->text_len; 1064 dest_r.info->facility = r->info->facility; 1065 dest_r.info->level = r->info->level; 1066 dest_r.info->flags = r->info->flags; 1067 dest_r.info->ts_nsec = r->info->ts_nsec; 1068 dest_r.info->caller_id = r->info->caller_id; 1069 memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); 1070 1071 prb_final_commit(&e); 1072 1073 return prb_record_text_space(&e); 1074 } 1075 1076 static char setup_text_buf[LOG_LINE_MAX] __initdata; 1077 1078 void __init setup_log_buf(int early) 1079 { 1080 struct printk_info *new_infos; 1081 unsigned int new_descs_count; 1082 struct prb_desc *new_descs; 1083 struct printk_info info; 1084 struct printk_record r; 1085 size_t new_descs_size; 1086 size_t new_infos_size; 1087 unsigned long flags; 1088 char *new_log_buf; 1089 unsigned int free; 1090 u64 seq; 1091 1092 /* 1093 * Some archs call setup_log_buf() multiple times - first is very 1094 * early, e.g. from setup_arch(), and second - when percpu_areas 1095 * are initialised. 1096 */ 1097 if (!early) 1098 set_percpu_data_ready(); 1099 1100 if (log_buf != __log_buf) 1101 return; 1102 1103 if (!early && !new_log_buf_len) 1104 log_buf_add_cpu(); 1105 1106 if (!new_log_buf_len) 1107 return; 1108 1109 new_descs_count = new_log_buf_len >> PRB_AVGBITS; 1110 if (new_descs_count == 0) { 1111 pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); 1112 return; 1113 } 1114 1115 new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); 1116 if (unlikely(!new_log_buf)) { 1117 pr_err("log_buf_len: %lu text bytes not available\n", 1118 new_log_buf_len); 1119 return; 1120 } 1121 1122 new_descs_size = new_descs_count * sizeof(struct prb_desc); 1123 new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); 1124 if (unlikely(!new_descs)) { 1125 pr_err("log_buf_len: %zu desc bytes not available\n", 1126 new_descs_size); 1127 goto err_free_log_buf; 1128 } 1129 1130 new_infos_size = new_descs_count * sizeof(struct printk_info); 1131 new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); 1132 if (unlikely(!new_infos)) { 1133 pr_err("log_buf_len: %zu info bytes not available\n", 1134 new_infos_size); 1135 goto err_free_descs; 1136 } 1137 1138 prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf)); 1139 1140 prb_init(&printk_rb_dynamic, 1141 new_log_buf, ilog2(new_log_buf_len), 1142 new_descs, ilog2(new_descs_count), 1143 new_infos); 1144 1145 printk_safe_enter_irqsave(flags); 1146 1147 log_buf_len = new_log_buf_len; 1148 log_buf = new_log_buf; 1149 new_log_buf_len = 0; 1150 1151 free = __LOG_BUF_LEN; 1152 prb_for_each_record(0, &printk_rb_static, seq, &r) 1153 free -= add_to_rb(&printk_rb_dynamic, &r); 1154 1155 /* 1156 * This is early enough that everything is still running on the 1157 * boot CPU and interrupts are disabled. So no new messages will 1158 * appear during the transition to the dynamic buffer. 1159 */ 1160 prb = &printk_rb_dynamic; 1161 1162 printk_safe_exit_irqrestore(flags); 1163 1164 if (seq != prb_next_seq(&printk_rb_static)) { 1165 pr_err("dropped %llu messages\n", 1166 prb_next_seq(&printk_rb_static) - seq); 1167 } 1168 1169 pr_info("log_buf_len: %u bytes\n", log_buf_len); 1170 pr_info("early log buf free: %u(%u%%)\n", 1171 free, (free * 100) / __LOG_BUF_LEN); 1172 return; 1173 1174 err_free_descs: 1175 memblock_free(__pa(new_descs), new_descs_size); 1176 err_free_log_buf: 1177 memblock_free(__pa(new_log_buf), new_log_buf_len); 1178 } 1179 1180 static bool __read_mostly ignore_loglevel; 1181 1182 static int __init ignore_loglevel_setup(char *str) 1183 { 1184 ignore_loglevel = true; 1185 pr_info("debug: ignoring loglevel setting.\n"); 1186 1187 return 0; 1188 } 1189 1190 early_param("ignore_loglevel", ignore_loglevel_setup); 1191 module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); 1192 MODULE_PARM_DESC(ignore_loglevel, 1193 "ignore loglevel setting (prints all kernel messages to the console)"); 1194 1195 static bool suppress_message_printing(int level) 1196 { 1197 return (level >= console_loglevel && !ignore_loglevel); 1198 } 1199 1200 #ifdef CONFIG_BOOT_PRINTK_DELAY 1201 1202 static int boot_delay; /* msecs delay after each printk during bootup */ 1203 static unsigned long long loops_per_msec; /* based on boot_delay */ 1204 1205 static int __init boot_delay_setup(char *str) 1206 { 1207 unsigned long lpj; 1208 1209 lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ 1210 loops_per_msec = (unsigned long long)lpj / 1000 * HZ; 1211 1212 get_option(&str, &boot_delay); 1213 if (boot_delay > 10 * 1000) 1214 boot_delay = 0; 1215 1216 pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, " 1217 "HZ: %d, loops_per_msec: %llu\n", 1218 boot_delay, preset_lpj, lpj, HZ, loops_per_msec); 1219 return 0; 1220 } 1221 early_param("boot_delay", boot_delay_setup); 1222 1223 static void boot_delay_msec(int level) 1224 { 1225 unsigned long long k; 1226 unsigned long timeout; 1227 1228 if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) 1229 || suppress_message_printing(level)) { 1230 return; 1231 } 1232 1233 k = (unsigned long long)loops_per_msec * boot_delay; 1234 1235 timeout = jiffies + msecs_to_jiffies(boot_delay); 1236 while (k) { 1237 k--; 1238 cpu_relax(); 1239 /* 1240 * use (volatile) jiffies to prevent 1241 * compiler reduction; loop termination via jiffies 1242 * is secondary and may or may not happen. 1243 */ 1244 if (time_after(jiffies, timeout)) 1245 break; 1246 touch_nmi_watchdog(); 1247 } 1248 } 1249 #else 1250 static inline void boot_delay_msec(int level) 1251 { 1252 } 1253 #endif 1254 1255 static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); 1256 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 1257 1258 static size_t print_syslog(unsigned int level, char *buf) 1259 { 1260 return sprintf(buf, "<%u>", level); 1261 } 1262 1263 static size_t print_time(u64 ts, char *buf) 1264 { 1265 unsigned long rem_nsec = do_div(ts, 1000000000); 1266 1267 return sprintf(buf, "[%5lu.%06lu]", 1268 (unsigned long)ts, rem_nsec / 1000); 1269 } 1270 1271 #ifdef CONFIG_PRINTK_CALLER 1272 static size_t print_caller(u32 id, char *buf) 1273 { 1274 char caller[12]; 1275 1276 snprintf(caller, sizeof(caller), "%c%u", 1277 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 1278 return sprintf(buf, "[%6s]", caller); 1279 } 1280 #else 1281 #define print_caller(id, buf) 0 1282 #endif 1283 1284 static size_t info_print_prefix(const struct printk_info *info, bool syslog, 1285 bool time, char *buf) 1286 { 1287 size_t len = 0; 1288 1289 if (syslog) 1290 len = print_syslog((info->facility << 3) | info->level, buf); 1291 1292 if (time) 1293 len += print_time(info->ts_nsec, buf + len); 1294 1295 len += print_caller(info->caller_id, buf + len); 1296 1297 if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) { 1298 buf[len++] = ' '; 1299 buf[len] = '\0'; 1300 } 1301 1302 return len; 1303 } 1304 1305 /* 1306 * Prepare the record for printing. The text is shifted within the given 1307 * buffer to avoid a need for another one. The following operations are 1308 * done: 1309 * 1310 * - Add prefix for each line. 1311 * - Drop truncated lines that no longer fit into the buffer. 1312 * - Add the trailing newline that has been removed in vprintk_store(). 1313 * - Add a string terminator. 1314 * 1315 * Since the produced string is always terminated, the maximum possible 1316 * return value is @r->text_buf_size - 1; 1317 * 1318 * Return: The length of the updated/prepared text, including the added 1319 * prefixes and the newline. The terminator is not counted. The dropped 1320 * line(s) are not counted. 1321 */ 1322 static size_t record_print_text(struct printk_record *r, bool syslog, 1323 bool time) 1324 { 1325 size_t text_len = r->info->text_len; 1326 size_t buf_size = r->text_buf_size; 1327 char *text = r->text_buf; 1328 char prefix[PREFIX_MAX]; 1329 bool truncated = false; 1330 size_t prefix_len; 1331 size_t line_len; 1332 size_t len = 0; 1333 char *next; 1334 1335 /* 1336 * If the message was truncated because the buffer was not large 1337 * enough, treat the available text as if it were the full text. 1338 */ 1339 if (text_len > buf_size) 1340 text_len = buf_size; 1341 1342 prefix_len = info_print_prefix(r->info, syslog, time, prefix); 1343 1344 /* 1345 * @text_len: bytes of unprocessed text 1346 * @line_len: bytes of current line _without_ newline 1347 * @text: pointer to beginning of current line 1348 * @len: number of bytes prepared in r->text_buf 1349 */ 1350 for (;;) { 1351 next = memchr(text, '\n', text_len); 1352 if (next) { 1353 line_len = next - text; 1354 } else { 1355 /* Drop truncated line(s). */ 1356 if (truncated) 1357 break; 1358 line_len = text_len; 1359 } 1360 1361 /* 1362 * Truncate the text if there is not enough space to add the 1363 * prefix and a trailing newline and a terminator. 1364 */ 1365 if (len + prefix_len + text_len + 1 + 1 > buf_size) { 1366 /* Drop even the current line if no space. */ 1367 if (len + prefix_len + line_len + 1 + 1 > buf_size) 1368 break; 1369 1370 text_len = buf_size - len - prefix_len - 1 - 1; 1371 truncated = true; 1372 } 1373 1374 memmove(text + prefix_len, text, text_len); 1375 memcpy(text, prefix, prefix_len); 1376 1377 /* 1378 * Increment the prepared length to include the text and 1379 * prefix that were just moved+copied. Also increment for the 1380 * newline at the end of this line. If this is the last line, 1381 * there is no newline, but it will be added immediately below. 1382 */ 1383 len += prefix_len + line_len + 1; 1384 if (text_len == line_len) { 1385 /* 1386 * This is the last line. Add the trailing newline 1387 * removed in vprintk_store(). 1388 */ 1389 text[prefix_len + line_len] = '\n'; 1390 break; 1391 } 1392 1393 /* 1394 * Advance beyond the added prefix and the related line with 1395 * its newline. 1396 */ 1397 text += prefix_len + line_len + 1; 1398 1399 /* 1400 * The remaining text has only decreased by the line with its 1401 * newline. 1402 * 1403 * Note that @text_len can become zero. It happens when @text 1404 * ended with a newline (either due to truncation or the 1405 * original string ending with "\n\n"). The loop is correctly 1406 * repeated and (if not truncated) an empty line with a prefix 1407 * will be prepared. 1408 */ 1409 text_len -= line_len + 1; 1410 } 1411 1412 /* 1413 * If a buffer was provided, it will be terminated. Space for the 1414 * string terminator is guaranteed to be available. The terminator is 1415 * not counted in the return value. 1416 */ 1417 if (buf_size > 0) 1418 r->text_buf[len] = 0; 1419 1420 return len; 1421 } 1422 1423 static size_t get_record_print_text_size(struct printk_info *info, 1424 unsigned int line_count, 1425 bool syslog, bool time) 1426 { 1427 char prefix[PREFIX_MAX]; 1428 size_t prefix_len; 1429 1430 prefix_len = info_print_prefix(info, syslog, time, prefix); 1431 1432 /* 1433 * Each line will be preceded with a prefix. The intermediate 1434 * newlines are already within the text, but a final trailing 1435 * newline will be added. 1436 */ 1437 return ((prefix_len * line_count) + info->text_len + 1); 1438 } 1439 1440 /* 1441 * Beginning with @start_seq, find the first record where it and all following 1442 * records up to (but not including) @max_seq fit into @size. 1443 * 1444 * @max_seq is simply an upper bound and does not need to exist. If the caller 1445 * does not require an upper bound, -1 can be used for @max_seq. 1446 */ 1447 static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, 1448 bool syslog, bool time) 1449 { 1450 struct printk_info info; 1451 unsigned int line_count; 1452 size_t len = 0; 1453 u64 seq; 1454 1455 /* Determine the size of the records up to @max_seq. */ 1456 prb_for_each_info(start_seq, prb, seq, &info, &line_count) { 1457 if (info.seq >= max_seq) 1458 break; 1459 len += get_record_print_text_size(&info, line_count, syslog, time); 1460 } 1461 1462 /* 1463 * Adjust the upper bound for the next loop to avoid subtracting 1464 * lengths that were never added. 1465 */ 1466 if (seq < max_seq) 1467 max_seq = seq; 1468 1469 /* 1470 * Move first record forward until length fits into the buffer. Ignore 1471 * newest messages that were not counted in the above cycle. Messages 1472 * might appear and get lost in the meantime. This is a best effort 1473 * that prevents an infinite loop that could occur with a retry. 1474 */ 1475 prb_for_each_info(start_seq, prb, seq, &info, &line_count) { 1476 if (len <= size || info.seq >= max_seq) 1477 break; 1478 len -= get_record_print_text_size(&info, line_count, syslog, time); 1479 } 1480 1481 return seq; 1482 } 1483 1484 static int syslog_print(char __user *buf, int size) 1485 { 1486 struct printk_info info; 1487 struct printk_record r; 1488 char *text; 1489 int len = 0; 1490 1491 text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); 1492 if (!text) 1493 return -ENOMEM; 1494 1495 prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); 1496 1497 while (size > 0) { 1498 size_t n; 1499 size_t skip; 1500 1501 printk_safe_enter_irq(); 1502 raw_spin_lock(&syslog_lock); 1503 if (!prb_read_valid(prb, syslog_seq, &r)) { 1504 raw_spin_unlock(&syslog_lock); 1505 printk_safe_exit_irq(); 1506 break; 1507 } 1508 if (r.info->seq != syslog_seq) { 1509 /* message is gone, move to next valid one */ 1510 syslog_seq = r.info->seq; 1511 syslog_partial = 0; 1512 } 1513 1514 /* 1515 * To keep reading/counting partial line consistent, 1516 * use printk_time value as of the beginning of a line. 1517 */ 1518 if (!syslog_partial) 1519 syslog_time = printk_time; 1520 1521 skip = syslog_partial; 1522 n = record_print_text(&r, true, syslog_time); 1523 if (n - syslog_partial <= size) { 1524 /* message fits into buffer, move forward */ 1525 syslog_seq = r.info->seq + 1; 1526 n -= syslog_partial; 1527 syslog_partial = 0; 1528 } else if (!len){ 1529 /* partial read(), remember position */ 1530 n = size; 1531 syslog_partial += n; 1532 } else 1533 n = 0; 1534 raw_spin_unlock(&syslog_lock); 1535 printk_safe_exit_irq(); 1536 1537 if (!n) 1538 break; 1539 1540 if (copy_to_user(buf, text + skip, n)) { 1541 if (!len) 1542 len = -EFAULT; 1543 break; 1544 } 1545 1546 len += n; 1547 size -= n; 1548 buf += n; 1549 } 1550 1551 kfree(text); 1552 return len; 1553 } 1554 1555 static int syslog_print_all(char __user *buf, int size, bool clear) 1556 { 1557 struct printk_info info; 1558 struct printk_record r; 1559 char *text; 1560 int len = 0; 1561 u64 seq; 1562 bool time; 1563 1564 text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); 1565 if (!text) 1566 return -ENOMEM; 1567 1568 time = printk_time; 1569 printk_safe_enter_irq(); 1570 /* 1571 * Find first record that fits, including all following records, 1572 * into the user-provided buffer for this dump. 1573 */ 1574 seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, 1575 size, true, time); 1576 1577 prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); 1578 1579 len = 0; 1580 prb_for_each_record(seq, prb, seq, &r) { 1581 int textlen; 1582 1583 textlen = record_print_text(&r, true, time); 1584 1585 if (len + textlen > size) { 1586 seq--; 1587 break; 1588 } 1589 1590 printk_safe_exit_irq(); 1591 if (copy_to_user(buf + len, text, textlen)) 1592 len = -EFAULT; 1593 else 1594 len += textlen; 1595 printk_safe_enter_irq(); 1596 1597 if (len < 0) 1598 break; 1599 } 1600 1601 if (clear) { 1602 raw_spin_lock(&syslog_lock); 1603 latched_seq_write(&clear_seq, seq); 1604 raw_spin_unlock(&syslog_lock); 1605 } 1606 printk_safe_exit_irq(); 1607 1608 kfree(text); 1609 return len; 1610 } 1611 1612 static void syslog_clear(void) 1613 { 1614 printk_safe_enter_irq(); 1615 raw_spin_lock(&syslog_lock); 1616 latched_seq_write(&clear_seq, prb_next_seq(prb)); 1617 raw_spin_unlock(&syslog_lock); 1618 printk_safe_exit_irq(); 1619 } 1620 1621 /* Return a consistent copy of @syslog_seq. */ 1622 static u64 read_syslog_seq_irq(void) 1623 { 1624 u64 seq; 1625 1626 raw_spin_lock_irq(&syslog_lock); 1627 seq = syslog_seq; 1628 raw_spin_unlock_irq(&syslog_lock); 1629 1630 return seq; 1631 } 1632 1633 int do_syslog(int type, char __user *buf, int len, int source) 1634 { 1635 struct printk_info info; 1636 bool clear = false; 1637 static int saved_console_loglevel = LOGLEVEL_DEFAULT; 1638 int error; 1639 1640 error = check_syslog_permissions(type, source); 1641 if (error) 1642 return error; 1643 1644 switch (type) { 1645 case SYSLOG_ACTION_CLOSE: /* Close log */ 1646 break; 1647 case SYSLOG_ACTION_OPEN: /* Open log */ 1648 break; 1649 case SYSLOG_ACTION_READ: /* Read from log */ 1650 if (!buf || len < 0) 1651 return -EINVAL; 1652 if (!len) 1653 return 0; 1654 if (!access_ok(buf, len)) 1655 return -EFAULT; 1656 1657 error = wait_event_interruptible(log_wait, 1658 prb_read_valid(prb, read_syslog_seq_irq(), NULL)); 1659 if (error) 1660 return error; 1661 error = syslog_print(buf, len); 1662 break; 1663 /* Read/clear last kernel messages */ 1664 case SYSLOG_ACTION_READ_CLEAR: 1665 clear = true; 1666 fallthrough; 1667 /* Read last kernel messages */ 1668 case SYSLOG_ACTION_READ_ALL: 1669 if (!buf || len < 0) 1670 return -EINVAL; 1671 if (!len) 1672 return 0; 1673 if (!access_ok(buf, len)) 1674 return -EFAULT; 1675 error = syslog_print_all(buf, len, clear); 1676 break; 1677 /* Clear ring buffer */ 1678 case SYSLOG_ACTION_CLEAR: 1679 syslog_clear(); 1680 break; 1681 /* Disable logging to console */ 1682 case SYSLOG_ACTION_CONSOLE_OFF: 1683 if (saved_console_loglevel == LOGLEVEL_DEFAULT) 1684 saved_console_loglevel = console_loglevel; 1685 console_loglevel = minimum_console_loglevel; 1686 break; 1687 /* Enable logging to console */ 1688 case SYSLOG_ACTION_CONSOLE_ON: 1689 if (saved_console_loglevel != LOGLEVEL_DEFAULT) { 1690 console_loglevel = saved_console_loglevel; 1691 saved_console_loglevel = LOGLEVEL_DEFAULT; 1692 } 1693 break; 1694 /* Set level of messages printed to console */ 1695 case SYSLOG_ACTION_CONSOLE_LEVEL: 1696 if (len < 1 || len > 8) 1697 return -EINVAL; 1698 if (len < minimum_console_loglevel) 1699 len = minimum_console_loglevel; 1700 console_loglevel = len; 1701 /* Implicitly re-enable logging to console */ 1702 saved_console_loglevel = LOGLEVEL_DEFAULT; 1703 break; 1704 /* Number of chars in the log buffer */ 1705 case SYSLOG_ACTION_SIZE_UNREAD: 1706 printk_safe_enter_irq(); 1707 raw_spin_lock(&syslog_lock); 1708 if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { 1709 /* No unread messages. */ 1710 raw_spin_unlock(&syslog_lock); 1711 printk_safe_exit_irq(); 1712 return 0; 1713 } 1714 if (info.seq != syslog_seq) { 1715 /* messages are gone, move to first one */ 1716 syslog_seq = info.seq; 1717 syslog_partial = 0; 1718 } 1719 if (source == SYSLOG_FROM_PROC) { 1720 /* 1721 * Short-cut for poll(/"proc/kmsg") which simply checks 1722 * for pending data, not the size; return the count of 1723 * records, not the length. 1724 */ 1725 error = prb_next_seq(prb) - syslog_seq; 1726 } else { 1727 bool time = syslog_partial ? syslog_time : printk_time; 1728 unsigned int line_count; 1729 u64 seq; 1730 1731 prb_for_each_info(syslog_seq, prb, seq, &info, 1732 &line_count) { 1733 error += get_record_print_text_size(&info, line_count, 1734 true, time); 1735 time = printk_time; 1736 } 1737 error -= syslog_partial; 1738 } 1739 raw_spin_unlock(&syslog_lock); 1740 printk_safe_exit_irq(); 1741 break; 1742 /* Size of the log buffer */ 1743 case SYSLOG_ACTION_SIZE_BUFFER: 1744 error = log_buf_len; 1745 break; 1746 default: 1747 error = -EINVAL; 1748 break; 1749 } 1750 1751 return error; 1752 } 1753 1754 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) 1755 { 1756 return do_syslog(type, buf, len, SYSLOG_FROM_READER); 1757 } 1758 1759 /* 1760 * Special console_lock variants that help to reduce the risk of soft-lockups. 1761 * They allow to pass console_lock to another printk() call using a busy wait. 1762 */ 1763 1764 #ifdef CONFIG_LOCKDEP 1765 static struct lockdep_map console_owner_dep_map = { 1766 .name = "console_owner" 1767 }; 1768 #endif 1769 1770 static DEFINE_RAW_SPINLOCK(console_owner_lock); 1771 static struct task_struct *console_owner; 1772 static bool console_waiter; 1773 1774 /** 1775 * console_lock_spinning_enable - mark beginning of code where another 1776 * thread might safely busy wait 1777 * 1778 * This basically converts console_lock into a spinlock. This marks 1779 * the section where the console_lock owner can not sleep, because 1780 * there may be a waiter spinning (like a spinlock). Also it must be 1781 * ready to hand over the lock at the end of the section. 1782 */ 1783 static void console_lock_spinning_enable(void) 1784 { 1785 raw_spin_lock(&console_owner_lock); 1786 console_owner = current; 1787 raw_spin_unlock(&console_owner_lock); 1788 1789 /* The waiter may spin on us after setting console_owner */ 1790 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1791 } 1792 1793 /** 1794 * console_lock_spinning_disable_and_check - mark end of code where another 1795 * thread was able to busy wait and check if there is a waiter 1796 * 1797 * This is called at the end of the section where spinning is allowed. 1798 * It has two functions. First, it is a signal that it is no longer 1799 * safe to start busy waiting for the lock. Second, it checks if 1800 * there is a busy waiter and passes the lock rights to her. 1801 * 1802 * Important: Callers lose the lock if there was a busy waiter. 1803 * They must not touch items synchronized by console_lock 1804 * in this case. 1805 * 1806 * Return: 1 if the lock rights were passed, 0 otherwise. 1807 */ 1808 static int console_lock_spinning_disable_and_check(void) 1809 { 1810 int waiter; 1811 1812 raw_spin_lock(&console_owner_lock); 1813 waiter = READ_ONCE(console_waiter); 1814 console_owner = NULL; 1815 raw_spin_unlock(&console_owner_lock); 1816 1817 if (!waiter) { 1818 spin_release(&console_owner_dep_map, _THIS_IP_); 1819 return 0; 1820 } 1821 1822 /* The waiter is now free to continue */ 1823 WRITE_ONCE(console_waiter, false); 1824 1825 spin_release(&console_owner_dep_map, _THIS_IP_); 1826 1827 /* 1828 * Hand off console_lock to waiter. The waiter will perform 1829 * the up(). After this, the waiter is the console_lock owner. 1830 */ 1831 mutex_release(&console_lock_dep_map, _THIS_IP_); 1832 return 1; 1833 } 1834 1835 /** 1836 * console_trylock_spinning - try to get console_lock by busy waiting 1837 * 1838 * This allows to busy wait for the console_lock when the current 1839 * owner is running in specially marked sections. It means that 1840 * the current owner is running and cannot reschedule until it 1841 * is ready to lose the lock. 1842 * 1843 * Return: 1 if we got the lock, 0 othrewise 1844 */ 1845 static int console_trylock_spinning(void) 1846 { 1847 struct task_struct *owner = NULL; 1848 bool waiter; 1849 bool spin = false; 1850 unsigned long flags; 1851 1852 if (console_trylock()) 1853 return 1; 1854 1855 printk_safe_enter_irqsave(flags); 1856 1857 raw_spin_lock(&console_owner_lock); 1858 owner = READ_ONCE(console_owner); 1859 waiter = READ_ONCE(console_waiter); 1860 if (!waiter && owner && owner != current) { 1861 WRITE_ONCE(console_waiter, true); 1862 spin = true; 1863 } 1864 raw_spin_unlock(&console_owner_lock); 1865 1866 /* 1867 * If there is an active printk() writing to the 1868 * consoles, instead of having it write our data too, 1869 * see if we can offload that load from the active 1870 * printer, and do some printing ourselves. 1871 * Go into a spin only if there isn't already a waiter 1872 * spinning, and there is an active printer, and 1873 * that active printer isn't us (recursive printk?). 1874 */ 1875 if (!spin) { 1876 printk_safe_exit_irqrestore(flags); 1877 return 0; 1878 } 1879 1880 /* We spin waiting for the owner to release us */ 1881 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1882 /* Owner will clear console_waiter on hand off */ 1883 while (READ_ONCE(console_waiter)) 1884 cpu_relax(); 1885 spin_release(&console_owner_dep_map, _THIS_IP_); 1886 1887 printk_safe_exit_irqrestore(flags); 1888 /* 1889 * The owner passed the console lock to us. 1890 * Since we did not spin on console lock, annotate 1891 * this as a trylock. Otherwise lockdep will 1892 * complain. 1893 */ 1894 mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); 1895 1896 return 1; 1897 } 1898 1899 /* 1900 * Call the console drivers, asking them to write out 1901 * log_buf[start] to log_buf[end - 1]. 1902 * The console_lock must be held. 1903 */ 1904 static void call_console_drivers(const char *ext_text, size_t ext_len, 1905 const char *text, size_t len) 1906 { 1907 static char dropped_text[64]; 1908 size_t dropped_len = 0; 1909 struct console *con; 1910 1911 trace_console_rcuidle(text, len); 1912 1913 if (!console_drivers) 1914 return; 1915 1916 if (console_dropped) { 1917 dropped_len = snprintf(dropped_text, sizeof(dropped_text), 1918 "** %lu printk messages dropped **\n", 1919 console_dropped); 1920 console_dropped = 0; 1921 } 1922 1923 for_each_console(con) { 1924 if (exclusive_console && con != exclusive_console) 1925 continue; 1926 if (!(con->flags & CON_ENABLED)) 1927 continue; 1928 if (!con->write) 1929 continue; 1930 if (!cpu_online(smp_processor_id()) && 1931 !(con->flags & CON_ANYTIME)) 1932 continue; 1933 if (con->flags & CON_EXTENDED) 1934 con->write(con, ext_text, ext_len); 1935 else { 1936 if (dropped_len) 1937 con->write(con, dropped_text, dropped_len); 1938 con->write(con, text, len); 1939 } 1940 } 1941 } 1942 1943 int printk_delay_msec __read_mostly; 1944 1945 static inline void printk_delay(void) 1946 { 1947 if (unlikely(printk_delay_msec)) { 1948 int m = printk_delay_msec; 1949 1950 while (m--) { 1951 mdelay(1); 1952 touch_nmi_watchdog(); 1953 } 1954 } 1955 } 1956 1957 static inline u32 printk_caller_id(void) 1958 { 1959 return in_task() ? task_pid_nr(current) : 1960 0x80000000 + raw_smp_processor_id(); 1961 } 1962 1963 /** 1964 * parse_prefix - Parse level and control flags. 1965 * 1966 * @text: The terminated text message. 1967 * @level: A pointer to the current level value, will be updated. 1968 * @lflags: A pointer to the current log flags, will be updated. 1969 * 1970 * @level may be NULL if the caller is not interested in the parsed value. 1971 * Otherwise the variable pointed to by @level must be set to 1972 * LOGLEVEL_DEFAULT in order to be updated with the parsed value. 1973 * 1974 * @lflags may be NULL if the caller is not interested in the parsed value. 1975 * Otherwise the variable pointed to by @lflags will be OR'd with the parsed 1976 * value. 1977 * 1978 * Return: The length of the parsed level and control flags. 1979 */ 1980 static u16 parse_prefix(char *text, int *level, enum log_flags *lflags) 1981 { 1982 u16 prefix_len = 0; 1983 int kern_level; 1984 1985 while (*text) { 1986 kern_level = printk_get_level(text); 1987 if (!kern_level) 1988 break; 1989 1990 switch (kern_level) { 1991 case '0' ... '7': 1992 if (level && *level == LOGLEVEL_DEFAULT) 1993 *level = kern_level - '0'; 1994 break; 1995 case 'c': /* KERN_CONT */ 1996 if (lflags) 1997 *lflags |= LOG_CONT; 1998 } 1999 2000 prefix_len += 2; 2001 text += 2; 2002 } 2003 2004 return prefix_len; 2005 } 2006 2007 static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lflags, 2008 const char *fmt, va_list args) 2009 { 2010 u16 text_len; 2011 2012 text_len = vscnprintf(text, size, fmt, args); 2013 2014 /* Mark and strip a trailing newline. */ 2015 if (text_len && text[text_len - 1] == '\n') { 2016 text_len--; 2017 *lflags |= LOG_NEWLINE; 2018 } 2019 2020 /* Strip log level and control flags. */ 2021 if (facility == 0) { 2022 u16 prefix_len; 2023 2024 prefix_len = parse_prefix(text, NULL, NULL); 2025 if (prefix_len) { 2026 text_len -= prefix_len; 2027 memmove(text, text + prefix_len, text_len); 2028 } 2029 } 2030 2031 return text_len; 2032 } 2033 2034 __printf(4, 0) 2035 int vprintk_store(int facility, int level, 2036 const struct dev_printk_info *dev_info, 2037 const char *fmt, va_list args) 2038 { 2039 const u32 caller_id = printk_caller_id(); 2040 struct prb_reserved_entry e; 2041 enum log_flags lflags = 0; 2042 struct printk_record r; 2043 u16 trunc_msg_len = 0; 2044 char prefix_buf[8]; 2045 u16 reserve_size; 2046 va_list args2; 2047 u16 text_len; 2048 u64 ts_nsec; 2049 2050 /* 2051 * Since the duration of printk() can vary depending on the message 2052 * and state of the ringbuffer, grab the timestamp now so that it is 2053 * close to the call of printk(). This provides a more deterministic 2054 * timestamp with respect to the caller. 2055 */ 2056 ts_nsec = local_clock(); 2057 2058 /* 2059 * The sprintf needs to come first since the syslog prefix might be 2060 * passed in as a parameter. An extra byte must be reserved so that 2061 * later the vscnprintf() into the reserved buffer has room for the 2062 * terminating '\0', which is not counted by vsnprintf(). 2063 */ 2064 va_copy(args2, args); 2065 reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1; 2066 va_end(args2); 2067 2068 if (reserve_size > LOG_LINE_MAX) 2069 reserve_size = LOG_LINE_MAX; 2070 2071 /* Extract log level or control flags. */ 2072 if (facility == 0) 2073 parse_prefix(&prefix_buf[0], &level, &lflags); 2074 2075 if (level == LOGLEVEL_DEFAULT) 2076 level = default_message_loglevel; 2077 2078 if (dev_info) 2079 lflags |= LOG_NEWLINE; 2080 2081 if (lflags & LOG_CONT) { 2082 prb_rec_init_wr(&r, reserve_size); 2083 if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { 2084 text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, 2085 facility, &lflags, fmt, args); 2086 r.info->text_len += text_len; 2087 2088 if (lflags & LOG_NEWLINE) { 2089 r.info->flags |= LOG_NEWLINE; 2090 prb_final_commit(&e); 2091 } else { 2092 prb_commit(&e); 2093 } 2094 2095 return text_len; 2096 } 2097 } 2098 2099 /* 2100 * Explicitly initialize the record before every prb_reserve() call. 2101 * prb_reserve_in_last() and prb_reserve() purposely invalidate the 2102 * structure when they fail. 2103 */ 2104 prb_rec_init_wr(&r, reserve_size); 2105 if (!prb_reserve(&e, prb, &r)) { 2106 /* truncate the message if it is too long for empty buffer */ 2107 truncate_msg(&reserve_size, &trunc_msg_len); 2108 2109 prb_rec_init_wr(&r, reserve_size + trunc_msg_len); 2110 if (!prb_reserve(&e, prb, &r)) 2111 return 0; 2112 } 2113 2114 /* fill message */ 2115 text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args); 2116 if (trunc_msg_len) 2117 memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); 2118 r.info->text_len = text_len + trunc_msg_len; 2119 r.info->facility = facility; 2120 r.info->level = level & 7; 2121 r.info->flags = lflags & 0x1f; 2122 r.info->ts_nsec = ts_nsec; 2123 r.info->caller_id = caller_id; 2124 if (dev_info) 2125 memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); 2126 2127 /* A message without a trailing newline can be continued. */ 2128 if (!(lflags & LOG_NEWLINE)) 2129 prb_commit(&e); 2130 else 2131 prb_final_commit(&e); 2132 2133 return (text_len + trunc_msg_len); 2134 } 2135 2136 asmlinkage int vprintk_emit(int facility, int level, 2137 const struct dev_printk_info *dev_info, 2138 const char *fmt, va_list args) 2139 { 2140 int printed_len; 2141 bool in_sched = false; 2142 unsigned long flags; 2143 2144 /* Suppress unimportant messages after panic happens */ 2145 if (unlikely(suppress_printk)) 2146 return 0; 2147 2148 if (level == LOGLEVEL_SCHED) { 2149 level = LOGLEVEL_DEFAULT; 2150 in_sched = true; 2151 } 2152 2153 boot_delay_msec(level); 2154 printk_delay(); 2155 2156 printk_safe_enter_irqsave(flags); 2157 printed_len = vprintk_store(facility, level, dev_info, fmt, args); 2158 printk_safe_exit_irqrestore(flags); 2159 2160 /* If called from the scheduler, we can not call up(). */ 2161 if (!in_sched) { 2162 /* 2163 * Disable preemption to avoid being preempted while holding 2164 * console_sem which would prevent anyone from printing to 2165 * console 2166 */ 2167 preempt_disable(); 2168 /* 2169 * Try to acquire and then immediately release the console 2170 * semaphore. The release will print out buffers and wake up 2171 * /dev/kmsg and syslog() users. 2172 */ 2173 if (console_trylock_spinning()) 2174 console_unlock(); 2175 preempt_enable(); 2176 } 2177 2178 wake_up_klogd(); 2179 return printed_len; 2180 } 2181 EXPORT_SYMBOL(vprintk_emit); 2182 2183 int vprintk_default(const char *fmt, va_list args) 2184 { 2185 return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); 2186 } 2187 EXPORT_SYMBOL_GPL(vprintk_default); 2188 2189 /** 2190 * printk - print a kernel message 2191 * @fmt: format string 2192 * 2193 * This is printk(). It can be called from any context. We want it to work. 2194 * 2195 * We try to grab the console_lock. If we succeed, it's easy - we log the 2196 * output and call the console drivers. If we fail to get the semaphore, we 2197 * place the output into the log buffer and return. The current holder of 2198 * the console_sem will notice the new output in console_unlock(); and will 2199 * send it to the consoles before releasing the lock. 2200 * 2201 * One effect of this deferred printing is that code which calls printk() and 2202 * then changes console_loglevel may break. This is because console_loglevel 2203 * is inspected when the actual printing occurs. 2204 * 2205 * See also: 2206 * printf(3) 2207 * 2208 * See the vsnprintf() documentation for format string extensions over C99. 2209 */ 2210 asmlinkage __visible int printk(const char *fmt, ...) 2211 { 2212 va_list args; 2213 int r; 2214 2215 va_start(args, fmt); 2216 r = vprintk(fmt, args); 2217 va_end(args); 2218 2219 return r; 2220 } 2221 EXPORT_SYMBOL(printk); 2222 2223 #else /* CONFIG_PRINTK */ 2224 2225 #define CONSOLE_LOG_MAX 0 2226 #define printk_time false 2227 2228 #define prb_read_valid(rb, seq, r) false 2229 #define prb_first_valid_seq(rb) 0 2230 2231 static u64 syslog_seq; 2232 static u64 console_seq; 2233 static u64 exclusive_console_stop_seq; 2234 static unsigned long console_dropped; 2235 2236 static size_t record_print_text(const struct printk_record *r, 2237 bool syslog, bool time) 2238 { 2239 return 0; 2240 } 2241 static ssize_t info_print_ext_header(char *buf, size_t size, 2242 struct printk_info *info) 2243 { 2244 return 0; 2245 } 2246 static ssize_t msg_print_ext_body(char *buf, size_t size, 2247 char *text, size_t text_len, 2248 struct dev_printk_info *dev_info) { return 0; } 2249 static void console_lock_spinning_enable(void) { } 2250 static int console_lock_spinning_disable_and_check(void) { return 0; } 2251 static void call_console_drivers(const char *ext_text, size_t ext_len, 2252 const char *text, size_t len) {} 2253 static bool suppress_message_printing(int level) { return false; } 2254 2255 #endif /* CONFIG_PRINTK */ 2256 2257 #ifdef CONFIG_EARLY_PRINTK 2258 struct console *early_console; 2259 2260 asmlinkage __visible void early_printk(const char *fmt, ...) 2261 { 2262 va_list ap; 2263 char buf[512]; 2264 int n; 2265 2266 if (!early_console) 2267 return; 2268 2269 va_start(ap, fmt); 2270 n = vscnprintf(buf, sizeof(buf), fmt, ap); 2271 va_end(ap); 2272 2273 early_console->write(early_console, buf, n); 2274 } 2275 #endif 2276 2277 static int __add_preferred_console(char *name, int idx, char *options, 2278 char *brl_options, bool user_specified) 2279 { 2280 struct console_cmdline *c; 2281 int i; 2282 2283 /* 2284 * See if this tty is not yet registered, and 2285 * if we have a slot free. 2286 */ 2287 for (i = 0, c = console_cmdline; 2288 i < MAX_CMDLINECONSOLES && c->name[0]; 2289 i++, c++) { 2290 if (strcmp(c->name, name) == 0 && c->index == idx) { 2291 if (!brl_options) 2292 preferred_console = i; 2293 if (user_specified) 2294 c->user_specified = true; 2295 return 0; 2296 } 2297 } 2298 if (i == MAX_CMDLINECONSOLES) 2299 return -E2BIG; 2300 if (!brl_options) 2301 preferred_console = i; 2302 strlcpy(c->name, name, sizeof(c->name)); 2303 c->options = options; 2304 c->user_specified = user_specified; 2305 braille_set_options(c, brl_options); 2306 2307 c->index = idx; 2308 return 0; 2309 } 2310 2311 static int __init console_msg_format_setup(char *str) 2312 { 2313 if (!strcmp(str, "syslog")) 2314 console_msg_format = MSG_FORMAT_SYSLOG; 2315 if (!strcmp(str, "default")) 2316 console_msg_format = MSG_FORMAT_DEFAULT; 2317 return 1; 2318 } 2319 __setup("console_msg_format=", console_msg_format_setup); 2320 2321 /* 2322 * Set up a console. Called via do_early_param() in init/main.c 2323 * for each "console=" parameter in the boot command line. 2324 */ 2325 static int __init console_setup(char *str) 2326 { 2327 char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */ 2328 char *s, *options, *brl_options = NULL; 2329 int idx; 2330 2331 /* 2332 * console="" or console=null have been suggested as a way to 2333 * disable console output. Use ttynull that has been created 2334 * for exactly this purpose. 2335 */ 2336 if (str[0] == 0 || strcmp(str, "null") == 0) { 2337 __add_preferred_console("ttynull", 0, NULL, NULL, true); 2338 return 1; 2339 } 2340 2341 if (_braille_console_setup(&str, &brl_options)) 2342 return 1; 2343 2344 /* 2345 * Decode str into name, index, options. 2346 */ 2347 if (str[0] >= '0' && str[0] <= '9') { 2348 strcpy(buf, "ttyS"); 2349 strncpy(buf + 4, str, sizeof(buf) - 5); 2350 } else { 2351 strncpy(buf, str, sizeof(buf) - 1); 2352 } 2353 buf[sizeof(buf) - 1] = 0; 2354 options = strchr(str, ','); 2355 if (options) 2356 *(options++) = 0; 2357 #ifdef __sparc__ 2358 if (!strcmp(str, "ttya")) 2359 strcpy(buf, "ttyS0"); 2360 if (!strcmp(str, "ttyb")) 2361 strcpy(buf, "ttyS1"); 2362 #endif 2363 for (s = buf; *s; s++) 2364 if (isdigit(*s) || *s == ',') 2365 break; 2366 idx = simple_strtoul(s, NULL, 10); 2367 *s = 0; 2368 2369 __add_preferred_console(buf, idx, options, brl_options, true); 2370 console_set_on_cmdline = 1; 2371 return 1; 2372 } 2373 __setup("console=", console_setup); 2374 2375 /** 2376 * add_preferred_console - add a device to the list of preferred consoles. 2377 * @name: device name 2378 * @idx: device index 2379 * @options: options for this console 2380 * 2381 * The last preferred console added will be used for kernel messages 2382 * and stdin/out/err for init. Normally this is used by console_setup 2383 * above to handle user-supplied console arguments; however it can also 2384 * be used by arch-specific code either to override the user or more 2385 * commonly to provide a default console (ie from PROM variables) when 2386 * the user has not supplied one. 2387 */ 2388 int add_preferred_console(char *name, int idx, char *options) 2389 { 2390 return __add_preferred_console(name, idx, options, NULL, false); 2391 } 2392 2393 bool console_suspend_enabled = true; 2394 EXPORT_SYMBOL(console_suspend_enabled); 2395 2396 static int __init console_suspend_disable(char *str) 2397 { 2398 console_suspend_enabled = false; 2399 return 1; 2400 } 2401 __setup("no_console_suspend", console_suspend_disable); 2402 module_param_named(console_suspend, console_suspend_enabled, 2403 bool, S_IRUGO | S_IWUSR); 2404 MODULE_PARM_DESC(console_suspend, "suspend console during suspend" 2405 " and hibernate operations"); 2406 2407 /** 2408 * suspend_console - suspend the console subsystem 2409 * 2410 * This disables printk() while we go into suspend states 2411 */ 2412 void suspend_console(void) 2413 { 2414 if (!console_suspend_enabled) 2415 return; 2416 pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); 2417 console_lock(); 2418 console_suspended = 1; 2419 up_console_sem(); 2420 } 2421 2422 void resume_console(void) 2423 { 2424 if (!console_suspend_enabled) 2425 return; 2426 down_console_sem(); 2427 console_suspended = 0; 2428 console_unlock(); 2429 } 2430 2431 /** 2432 * console_cpu_notify - print deferred console messages after CPU hotplug 2433 * @cpu: unused 2434 * 2435 * If printk() is called from a CPU that is not online yet, the messages 2436 * will be printed on the console only if there are CON_ANYTIME consoles. 2437 * This function is called when a new CPU comes online (or fails to come 2438 * up) or goes offline. 2439 */ 2440 static int console_cpu_notify(unsigned int cpu) 2441 { 2442 if (!cpuhp_tasks_frozen) { 2443 /* If trylock fails, someone else is doing the printing */ 2444 if (console_trylock()) 2445 console_unlock(); 2446 } 2447 return 0; 2448 } 2449 2450 /** 2451 * console_lock - lock the console system for exclusive use. 2452 * 2453 * Acquires a lock which guarantees that the caller has 2454 * exclusive access to the console system and the console_drivers list. 2455 * 2456 * Can sleep, returns nothing. 2457 */ 2458 void console_lock(void) 2459 { 2460 might_sleep(); 2461 2462 down_console_sem(); 2463 if (console_suspended) 2464 return; 2465 console_locked = 1; 2466 console_may_schedule = 1; 2467 } 2468 EXPORT_SYMBOL(console_lock); 2469 2470 /** 2471 * console_trylock - try to lock the console system for exclusive use. 2472 * 2473 * Try to acquire a lock which guarantees that the caller has exclusive 2474 * access to the console system and the console_drivers list. 2475 * 2476 * returns 1 on success, and 0 on failure to acquire the lock. 2477 */ 2478 int console_trylock(void) 2479 { 2480 if (down_trylock_console_sem()) 2481 return 0; 2482 if (console_suspended) { 2483 up_console_sem(); 2484 return 0; 2485 } 2486 console_locked = 1; 2487 console_may_schedule = 0; 2488 return 1; 2489 } 2490 EXPORT_SYMBOL(console_trylock); 2491 2492 int is_console_locked(void) 2493 { 2494 return console_locked; 2495 } 2496 EXPORT_SYMBOL(is_console_locked); 2497 2498 /* 2499 * Check if we have any console that is capable of printing while cpu is 2500 * booting or shutting down. Requires console_sem. 2501 */ 2502 static int have_callable_console(void) 2503 { 2504 struct console *con; 2505 2506 for_each_console(con) 2507 if ((con->flags & CON_ENABLED) && 2508 (con->flags & CON_ANYTIME)) 2509 return 1; 2510 2511 return 0; 2512 } 2513 2514 /* 2515 * Can we actually use the console at this time on this cpu? 2516 * 2517 * Console drivers may assume that per-cpu resources have been allocated. So 2518 * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't 2519 * call them until this CPU is officially up. 2520 */ 2521 static inline int can_use_console(void) 2522 { 2523 return cpu_online(raw_smp_processor_id()) || have_callable_console(); 2524 } 2525 2526 /** 2527 * console_unlock - unlock the console system 2528 * 2529 * Releases the console_lock which the caller holds on the console system 2530 * and the console driver list. 2531 * 2532 * While the console_lock was held, console output may have been buffered 2533 * by printk(). If this is the case, console_unlock(); emits 2534 * the output prior to releasing the lock. 2535 * 2536 * If there is output waiting, we wake /dev/kmsg and syslog() users. 2537 * 2538 * console_unlock(); may be called from any context. 2539 */ 2540 void console_unlock(void) 2541 { 2542 static char ext_text[CONSOLE_EXT_LOG_MAX]; 2543 static char text[CONSOLE_LOG_MAX]; 2544 unsigned long flags; 2545 bool do_cond_resched, retry; 2546 struct printk_info info; 2547 struct printk_record r; 2548 2549 if (console_suspended) { 2550 up_console_sem(); 2551 return; 2552 } 2553 2554 prb_rec_init_rd(&r, &info, text, sizeof(text)); 2555 2556 /* 2557 * Console drivers are called with interrupts disabled, so 2558 * @console_may_schedule should be cleared before; however, we may 2559 * end up dumping a lot of lines, for example, if called from 2560 * console registration path, and should invoke cond_resched() 2561 * between lines if allowable. Not doing so can cause a very long 2562 * scheduling stall on a slow console leading to RCU stall and 2563 * softlockup warnings which exacerbate the issue with more 2564 * messages practically incapacitating the system. 2565 * 2566 * console_trylock() is not able to detect the preemptive 2567 * context reliably. Therefore the value must be stored before 2568 * and cleared after the "again" goto label. 2569 */ 2570 do_cond_resched = console_may_schedule; 2571 again: 2572 console_may_schedule = 0; 2573 2574 /* 2575 * We released the console_sem lock, so we need to recheck if 2576 * cpu is online and (if not) is there at least one CON_ANYTIME 2577 * console. 2578 */ 2579 if (!can_use_console()) { 2580 console_locked = 0; 2581 up_console_sem(); 2582 return; 2583 } 2584 2585 for (;;) { 2586 size_t ext_len = 0; 2587 size_t len; 2588 2589 printk_safe_enter_irqsave(flags); 2590 skip: 2591 if (!prb_read_valid(prb, console_seq, &r)) 2592 break; 2593 2594 if (console_seq != r.info->seq) { 2595 console_dropped += r.info->seq - console_seq; 2596 console_seq = r.info->seq; 2597 } 2598 2599 if (suppress_message_printing(r.info->level)) { 2600 /* 2601 * Skip record we have buffered and already printed 2602 * directly to the console when we received it, and 2603 * record that has level above the console loglevel. 2604 */ 2605 console_seq++; 2606 goto skip; 2607 } 2608 2609 /* Output to all consoles once old messages replayed. */ 2610 if (unlikely(exclusive_console && 2611 console_seq >= exclusive_console_stop_seq)) { 2612 exclusive_console = NULL; 2613 } 2614 2615 /* 2616 * Handle extended console text first because later 2617 * record_print_text() will modify the record buffer in-place. 2618 */ 2619 if (nr_ext_console_drivers) { 2620 ext_len = info_print_ext_header(ext_text, 2621 sizeof(ext_text), 2622 r.info); 2623 ext_len += msg_print_ext_body(ext_text + ext_len, 2624 sizeof(ext_text) - ext_len, 2625 &r.text_buf[0], 2626 r.info->text_len, 2627 &r.info->dev_info); 2628 } 2629 len = record_print_text(&r, 2630 console_msg_format & MSG_FORMAT_SYSLOG, 2631 printk_time); 2632 console_seq++; 2633 2634 /* 2635 * While actively printing out messages, if another printk() 2636 * were to occur on another CPU, it may wait for this one to 2637 * finish. This task can not be preempted if there is a 2638 * waiter waiting to take over. 2639 */ 2640 console_lock_spinning_enable(); 2641 2642 stop_critical_timings(); /* don't trace print latency */ 2643 call_console_drivers(ext_text, ext_len, text, len); 2644 start_critical_timings(); 2645 2646 if (console_lock_spinning_disable_and_check()) { 2647 printk_safe_exit_irqrestore(flags); 2648 return; 2649 } 2650 2651 printk_safe_exit_irqrestore(flags); 2652 2653 if (do_cond_resched) 2654 cond_resched(); 2655 } 2656 2657 console_locked = 0; 2658 2659 up_console_sem(); 2660 2661 /* 2662 * Someone could have filled up the buffer again, so re-check if there's 2663 * something to flush. In case we cannot trylock the console_sem again, 2664 * there's a new owner and the console_unlock() from them will do the 2665 * flush, no worries. 2666 */ 2667 retry = prb_read_valid(prb, console_seq, NULL); 2668 printk_safe_exit_irqrestore(flags); 2669 2670 if (retry && console_trylock()) 2671 goto again; 2672 } 2673 EXPORT_SYMBOL(console_unlock); 2674 2675 /** 2676 * console_conditional_schedule - yield the CPU if required 2677 * 2678 * If the console code is currently allowed to sleep, and 2679 * if this CPU should yield the CPU to another task, do 2680 * so here. 2681 * 2682 * Must be called within console_lock();. 2683 */ 2684 void __sched console_conditional_schedule(void) 2685 { 2686 if (console_may_schedule) 2687 cond_resched(); 2688 } 2689 EXPORT_SYMBOL(console_conditional_schedule); 2690 2691 void console_unblank(void) 2692 { 2693 struct console *c; 2694 2695 /* 2696 * console_unblank can no longer be called in interrupt context unless 2697 * oops_in_progress is set to 1.. 2698 */ 2699 if (oops_in_progress) { 2700 if (down_trylock_console_sem() != 0) 2701 return; 2702 } else 2703 console_lock(); 2704 2705 console_locked = 1; 2706 console_may_schedule = 0; 2707 for_each_console(c) 2708 if ((c->flags & CON_ENABLED) && c->unblank) 2709 c->unblank(); 2710 console_unlock(); 2711 } 2712 2713 /** 2714 * console_flush_on_panic - flush console content on panic 2715 * @mode: flush all messages in buffer or just the pending ones 2716 * 2717 * Immediately output all pending messages no matter what. 2718 */ 2719 void console_flush_on_panic(enum con_flush_mode mode) 2720 { 2721 /* 2722 * If someone else is holding the console lock, trylock will fail 2723 * and may_schedule may be set. Ignore and proceed to unlock so 2724 * that messages are flushed out. As this can be called from any 2725 * context and we don't want to get preempted while flushing, 2726 * ensure may_schedule is cleared. 2727 */ 2728 console_trylock(); 2729 console_may_schedule = 0; 2730 2731 if (mode == CONSOLE_REPLAY_ALL) { 2732 unsigned long flags; 2733 2734 printk_safe_enter_irqsave(flags); 2735 console_seq = prb_first_valid_seq(prb); 2736 printk_safe_exit_irqrestore(flags); 2737 } 2738 console_unlock(); 2739 } 2740 2741 /* 2742 * Return the console tty driver structure and its associated index 2743 */ 2744 struct tty_driver *console_device(int *index) 2745 { 2746 struct console *c; 2747 struct tty_driver *driver = NULL; 2748 2749 console_lock(); 2750 for_each_console(c) { 2751 if (!c->device) 2752 continue; 2753 driver = c->device(c, index); 2754 if (driver) 2755 break; 2756 } 2757 console_unlock(); 2758 return driver; 2759 } 2760 2761 /* 2762 * Prevent further output on the passed console device so that (for example) 2763 * serial drivers can disable console output before suspending a port, and can 2764 * re-enable output afterwards. 2765 */ 2766 void console_stop(struct console *console) 2767 { 2768 console_lock(); 2769 console->flags &= ~CON_ENABLED; 2770 console_unlock(); 2771 } 2772 EXPORT_SYMBOL(console_stop); 2773 2774 void console_start(struct console *console) 2775 { 2776 console_lock(); 2777 console->flags |= CON_ENABLED; 2778 console_unlock(); 2779 } 2780 EXPORT_SYMBOL(console_start); 2781 2782 static int __read_mostly keep_bootcon; 2783 2784 static int __init keep_bootcon_setup(char *str) 2785 { 2786 keep_bootcon = 1; 2787 pr_info("debug: skip boot console de-registration.\n"); 2788 2789 return 0; 2790 } 2791 2792 early_param("keep_bootcon", keep_bootcon_setup); 2793 2794 /* 2795 * This is called by register_console() to try to match 2796 * the newly registered console with any of the ones selected 2797 * by either the command line or add_preferred_console() and 2798 * setup/enable it. 2799 * 2800 * Care need to be taken with consoles that are statically 2801 * enabled such as netconsole 2802 */ 2803 static int try_enable_new_console(struct console *newcon, bool user_specified) 2804 { 2805 struct console_cmdline *c; 2806 int i, err; 2807 2808 for (i = 0, c = console_cmdline; 2809 i < MAX_CMDLINECONSOLES && c->name[0]; 2810 i++, c++) { 2811 if (c->user_specified != user_specified) 2812 continue; 2813 if (!newcon->match || 2814 newcon->match(newcon, c->name, c->index, c->options) != 0) { 2815 /* default matching */ 2816 BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); 2817 if (strcmp(c->name, newcon->name) != 0) 2818 continue; 2819 if (newcon->index >= 0 && 2820 newcon->index != c->index) 2821 continue; 2822 if (newcon->index < 0) 2823 newcon->index = c->index; 2824 2825 if (_braille_register_console(newcon, c)) 2826 return 0; 2827 2828 if (newcon->setup && 2829 (err = newcon->setup(newcon, c->options)) != 0) 2830 return err; 2831 } 2832 newcon->flags |= CON_ENABLED; 2833 if (i == preferred_console) { 2834 newcon->flags |= CON_CONSDEV; 2835 has_preferred_console = true; 2836 } 2837 return 0; 2838 } 2839 2840 /* 2841 * Some consoles, such as pstore and netconsole, can be enabled even 2842 * without matching. Accept the pre-enabled consoles only when match() 2843 * and setup() had a chance to be called. 2844 */ 2845 if (newcon->flags & CON_ENABLED && c->user_specified == user_specified) 2846 return 0; 2847 2848 return -ENOENT; 2849 } 2850 2851 /* 2852 * The console driver calls this routine during kernel initialization 2853 * to register the console printing procedure with printk() and to 2854 * print any messages that were printed by the kernel before the 2855 * console driver was initialized. 2856 * 2857 * This can happen pretty early during the boot process (because of 2858 * early_printk) - sometimes before setup_arch() completes - be careful 2859 * of what kernel features are used - they may not be initialised yet. 2860 * 2861 * There are two types of consoles - bootconsoles (early_printk) and 2862 * "real" consoles (everything which is not a bootconsole) which are 2863 * handled differently. 2864 * - Any number of bootconsoles can be registered at any time. 2865 * - As soon as a "real" console is registered, all bootconsoles 2866 * will be unregistered automatically. 2867 * - Once a "real" console is registered, any attempt to register a 2868 * bootconsoles will be rejected 2869 */ 2870 void register_console(struct console *newcon) 2871 { 2872 unsigned long flags; 2873 struct console *bcon = NULL; 2874 int err; 2875 2876 for_each_console(bcon) { 2877 if (WARN(bcon == newcon, "console '%s%d' already registered\n", 2878 bcon->name, bcon->index)) 2879 return; 2880 } 2881 2882 /* 2883 * before we register a new CON_BOOT console, make sure we don't 2884 * already have a valid console 2885 */ 2886 if (newcon->flags & CON_BOOT) { 2887 for_each_console(bcon) { 2888 if (!(bcon->flags & CON_BOOT)) { 2889 pr_info("Too late to register bootconsole %s%d\n", 2890 newcon->name, newcon->index); 2891 return; 2892 } 2893 } 2894 } 2895 2896 if (console_drivers && console_drivers->flags & CON_BOOT) 2897 bcon = console_drivers; 2898 2899 if (!has_preferred_console || bcon || !console_drivers) 2900 has_preferred_console = preferred_console >= 0; 2901 2902 /* 2903 * See if we want to use this console driver. If we 2904 * didn't select a console we take the first one 2905 * that registers here. 2906 */ 2907 if (!has_preferred_console) { 2908 if (newcon->index < 0) 2909 newcon->index = 0; 2910 if (newcon->setup == NULL || 2911 newcon->setup(newcon, NULL) == 0) { 2912 newcon->flags |= CON_ENABLED; 2913 if (newcon->device) { 2914 newcon->flags |= CON_CONSDEV; 2915 has_preferred_console = true; 2916 } 2917 } 2918 } 2919 2920 /* See if this console matches one we selected on the command line */ 2921 err = try_enable_new_console(newcon, true); 2922 2923 /* If not, try to match against the platform default(s) */ 2924 if (err == -ENOENT) 2925 err = try_enable_new_console(newcon, false); 2926 2927 /* printk() messages are not printed to the Braille console. */ 2928 if (err || newcon->flags & CON_BRL) 2929 return; 2930 2931 /* 2932 * If we have a bootconsole, and are switching to a real console, 2933 * don't print everything out again, since when the boot console, and 2934 * the real console are the same physical device, it's annoying to 2935 * see the beginning boot messages twice 2936 */ 2937 if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) 2938 newcon->flags &= ~CON_PRINTBUFFER; 2939 2940 /* 2941 * Put this console in the list - keep the 2942 * preferred driver at the head of the list. 2943 */ 2944 console_lock(); 2945 if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { 2946 newcon->next = console_drivers; 2947 console_drivers = newcon; 2948 if (newcon->next) 2949 newcon->next->flags &= ~CON_CONSDEV; 2950 /* Ensure this flag is always set for the head of the list */ 2951 newcon->flags |= CON_CONSDEV; 2952 } else { 2953 newcon->next = console_drivers->next; 2954 console_drivers->next = newcon; 2955 } 2956 2957 if (newcon->flags & CON_EXTENDED) 2958 nr_ext_console_drivers++; 2959 2960 if (newcon->flags & CON_PRINTBUFFER) { 2961 /* 2962 * console_unlock(); will print out the buffered messages 2963 * for us. 2964 * 2965 * We're about to replay the log buffer. Only do this to the 2966 * just-registered console to avoid excessive message spam to 2967 * the already-registered consoles. 2968 * 2969 * Set exclusive_console with disabled interrupts to reduce 2970 * race window with eventual console_flush_on_panic() that 2971 * ignores console_lock. 2972 */ 2973 exclusive_console = newcon; 2974 exclusive_console_stop_seq = console_seq; 2975 2976 /* Get a consistent copy of @syslog_seq. */ 2977 raw_spin_lock_irqsave(&syslog_lock, flags); 2978 console_seq = syslog_seq; 2979 raw_spin_unlock_irqrestore(&syslog_lock, flags); 2980 } 2981 console_unlock(); 2982 console_sysfs_notify(); 2983 2984 /* 2985 * By unregistering the bootconsoles after we enable the real console 2986 * we get the "console xxx enabled" message on all the consoles - 2987 * boot consoles, real consoles, etc - this is to ensure that end 2988 * users know there might be something in the kernel's log buffer that 2989 * went to the bootconsole (that they do not see on the real console) 2990 */ 2991 pr_info("%sconsole [%s%d] enabled\n", 2992 (newcon->flags & CON_BOOT) ? "boot" : "" , 2993 newcon->name, newcon->index); 2994 if (bcon && 2995 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && 2996 !keep_bootcon) { 2997 /* We need to iterate through all boot consoles, to make 2998 * sure we print everything out, before we unregister them. 2999 */ 3000 for_each_console(bcon) 3001 if (bcon->flags & CON_BOOT) 3002 unregister_console(bcon); 3003 } 3004 } 3005 EXPORT_SYMBOL(register_console); 3006 3007 int unregister_console(struct console *console) 3008 { 3009 struct console *con; 3010 int res; 3011 3012 pr_info("%sconsole [%s%d] disabled\n", 3013 (console->flags & CON_BOOT) ? "boot" : "" , 3014 console->name, console->index); 3015 3016 res = _braille_unregister_console(console); 3017 if (res < 0) 3018 return res; 3019 if (res > 0) 3020 return 0; 3021 3022 res = -ENODEV; 3023 console_lock(); 3024 if (console_drivers == console) { 3025 console_drivers=console->next; 3026 res = 0; 3027 } else { 3028 for_each_console(con) { 3029 if (con->next == console) { 3030 con->next = console->next; 3031 res = 0; 3032 break; 3033 } 3034 } 3035 } 3036 3037 if (res) 3038 goto out_disable_unlock; 3039 3040 if (console->flags & CON_EXTENDED) 3041 nr_ext_console_drivers--; 3042 3043 /* 3044 * If this isn't the last console and it has CON_CONSDEV set, we 3045 * need to set it on the next preferred console. 3046 */ 3047 if (console_drivers != NULL && console->flags & CON_CONSDEV) 3048 console_drivers->flags |= CON_CONSDEV; 3049 3050 console->flags &= ~CON_ENABLED; 3051 console_unlock(); 3052 console_sysfs_notify(); 3053 3054 if (console->exit) 3055 res = console->exit(console); 3056 3057 return res; 3058 3059 out_disable_unlock: 3060 console->flags &= ~CON_ENABLED; 3061 console_unlock(); 3062 3063 return res; 3064 } 3065 EXPORT_SYMBOL(unregister_console); 3066 3067 /* 3068 * Initialize the console device. This is called *early*, so 3069 * we can't necessarily depend on lots of kernel help here. 3070 * Just do some early initializations, and do the complex setup 3071 * later. 3072 */ 3073 void __init console_init(void) 3074 { 3075 int ret; 3076 initcall_t call; 3077 initcall_entry_t *ce; 3078 3079 /* Setup the default TTY line discipline. */ 3080 n_tty_init(); 3081 3082 /* 3083 * set up the console device so that later boot sequences can 3084 * inform about problems etc.. 3085 */ 3086 ce = __con_initcall_start; 3087 trace_initcall_level("console"); 3088 while (ce < __con_initcall_end) { 3089 call = initcall_from_entry(ce); 3090 trace_initcall_start(call); 3091 ret = call(); 3092 trace_initcall_finish(call, ret); 3093 ce++; 3094 } 3095 } 3096 3097 /* 3098 * Some boot consoles access data that is in the init section and which will 3099 * be discarded after the initcalls have been run. To make sure that no code 3100 * will access this data, unregister the boot consoles in a late initcall. 3101 * 3102 * If for some reason, such as deferred probe or the driver being a loadable 3103 * module, the real console hasn't registered yet at this point, there will 3104 * be a brief interval in which no messages are logged to the console, which 3105 * makes it difficult to diagnose problems that occur during this time. 3106 * 3107 * To mitigate this problem somewhat, only unregister consoles whose memory 3108 * intersects with the init section. Note that all other boot consoles will 3109 * get unregistered when the real preferred console is registered. 3110 */ 3111 static int __init printk_late_init(void) 3112 { 3113 struct console *con; 3114 int ret; 3115 3116 for_each_console(con) { 3117 if (!(con->flags & CON_BOOT)) 3118 continue; 3119 3120 /* Check addresses that might be used for enabled consoles. */ 3121 if (init_section_intersects(con, sizeof(*con)) || 3122 init_section_contains(con->write, 0) || 3123 init_section_contains(con->read, 0) || 3124 init_section_contains(con->device, 0) || 3125 init_section_contains(con->unblank, 0) || 3126 init_section_contains(con->data, 0)) { 3127 /* 3128 * Please, consider moving the reported consoles out 3129 * of the init section. 3130 */ 3131 pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n", 3132 con->name, con->index); 3133 unregister_console(con); 3134 } 3135 } 3136 ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, 3137 console_cpu_notify); 3138 WARN_ON(ret < 0); 3139 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", 3140 console_cpu_notify, NULL); 3141 WARN_ON(ret < 0); 3142 return 0; 3143 } 3144 late_initcall(printk_late_init); 3145 3146 #if defined CONFIG_PRINTK 3147 /* 3148 * Delayed printk version, for scheduler-internal messages: 3149 */ 3150 #define PRINTK_PENDING_WAKEUP 0x01 3151 #define PRINTK_PENDING_OUTPUT 0x02 3152 3153 static DEFINE_PER_CPU(int, printk_pending); 3154 3155 static void wake_up_klogd_work_func(struct irq_work *irq_work) 3156 { 3157 int pending = __this_cpu_xchg(printk_pending, 0); 3158 3159 if (pending & PRINTK_PENDING_OUTPUT) { 3160 /* If trylock fails, someone else is doing the printing */ 3161 if (console_trylock()) 3162 console_unlock(); 3163 } 3164 3165 if (pending & PRINTK_PENDING_WAKEUP) 3166 wake_up_interruptible(&log_wait); 3167 } 3168 3169 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = 3170 IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func); 3171 3172 void wake_up_klogd(void) 3173 { 3174 if (!printk_percpu_data_ready()) 3175 return; 3176 3177 preempt_disable(); 3178 if (waitqueue_active(&log_wait)) { 3179 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); 3180 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); 3181 } 3182 preempt_enable(); 3183 } 3184 3185 void defer_console_output(void) 3186 { 3187 if (!printk_percpu_data_ready()) 3188 return; 3189 3190 preempt_disable(); 3191 __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); 3192 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); 3193 preempt_enable(); 3194 } 3195 3196 int vprintk_deferred(const char *fmt, va_list args) 3197 { 3198 int r; 3199 3200 r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); 3201 defer_console_output(); 3202 3203 return r; 3204 } 3205 3206 int printk_deferred(const char *fmt, ...) 3207 { 3208 va_list args; 3209 int r; 3210 3211 va_start(args, fmt); 3212 r = vprintk_deferred(fmt, args); 3213 va_end(args); 3214 3215 return r; 3216 } 3217 3218 /* 3219 * printk rate limiting, lifted from the networking subsystem. 3220 * 3221 * This enforces a rate limit: not more than 10 kernel messages 3222 * every 5s to make a denial-of-service attack impossible. 3223 */ 3224 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); 3225 3226 int __printk_ratelimit(const char *func) 3227 { 3228 return ___ratelimit(&printk_ratelimit_state, func); 3229 } 3230 EXPORT_SYMBOL(__printk_ratelimit); 3231 3232 /** 3233 * printk_timed_ratelimit - caller-controlled printk ratelimiting 3234 * @caller_jiffies: pointer to caller's state 3235 * @interval_msecs: minimum interval between prints 3236 * 3237 * printk_timed_ratelimit() returns true if more than @interval_msecs 3238 * milliseconds have elapsed since the last time printk_timed_ratelimit() 3239 * returned true. 3240 */ 3241 bool printk_timed_ratelimit(unsigned long *caller_jiffies, 3242 unsigned int interval_msecs) 3243 { 3244 unsigned long elapsed = jiffies - *caller_jiffies; 3245 3246 if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs)) 3247 return false; 3248 3249 *caller_jiffies = jiffies; 3250 return true; 3251 } 3252 EXPORT_SYMBOL(printk_timed_ratelimit); 3253 3254 static DEFINE_SPINLOCK(dump_list_lock); 3255 static LIST_HEAD(dump_list); 3256 3257 /** 3258 * kmsg_dump_register - register a kernel log dumper. 3259 * @dumper: pointer to the kmsg_dumper structure 3260 * 3261 * Adds a kernel log dumper to the system. The dump callback in the 3262 * structure will be called when the kernel oopses or panics and must be 3263 * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise. 3264 */ 3265 int kmsg_dump_register(struct kmsg_dumper *dumper) 3266 { 3267 unsigned long flags; 3268 int err = -EBUSY; 3269 3270 /* The dump callback needs to be set */ 3271 if (!dumper->dump) 3272 return -EINVAL; 3273 3274 spin_lock_irqsave(&dump_list_lock, flags); 3275 /* Don't allow registering multiple times */ 3276 if (!dumper->registered) { 3277 dumper->registered = 1; 3278 list_add_tail_rcu(&dumper->list, &dump_list); 3279 err = 0; 3280 } 3281 spin_unlock_irqrestore(&dump_list_lock, flags); 3282 3283 return err; 3284 } 3285 EXPORT_SYMBOL_GPL(kmsg_dump_register); 3286 3287 /** 3288 * kmsg_dump_unregister - unregister a kmsg dumper. 3289 * @dumper: pointer to the kmsg_dumper structure 3290 * 3291 * Removes a dump device from the system. Returns zero on success and 3292 * %-EINVAL otherwise. 3293 */ 3294 int kmsg_dump_unregister(struct kmsg_dumper *dumper) 3295 { 3296 unsigned long flags; 3297 int err = -EINVAL; 3298 3299 spin_lock_irqsave(&dump_list_lock, flags); 3300 if (dumper->registered) { 3301 dumper->registered = 0; 3302 list_del_rcu(&dumper->list); 3303 err = 0; 3304 } 3305 spin_unlock_irqrestore(&dump_list_lock, flags); 3306 synchronize_rcu(); 3307 3308 return err; 3309 } 3310 EXPORT_SYMBOL_GPL(kmsg_dump_unregister); 3311 3312 static bool always_kmsg_dump; 3313 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); 3314 3315 const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) 3316 { 3317 switch (reason) { 3318 case KMSG_DUMP_PANIC: 3319 return "Panic"; 3320 case KMSG_DUMP_OOPS: 3321 return "Oops"; 3322 case KMSG_DUMP_EMERG: 3323 return "Emergency"; 3324 case KMSG_DUMP_SHUTDOWN: 3325 return "Shutdown"; 3326 default: 3327 return "Unknown"; 3328 } 3329 } 3330 EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); 3331 3332 /** 3333 * kmsg_dump - dump kernel log to kernel message dumpers. 3334 * @reason: the reason (oops, panic etc) for dumping 3335 * 3336 * Call each of the registered dumper's dump() callback, which can 3337 * retrieve the kmsg records with kmsg_dump_get_line() or 3338 * kmsg_dump_get_buffer(). 3339 */ 3340 void kmsg_dump(enum kmsg_dump_reason reason) 3341 { 3342 struct kmsg_dumper *dumper; 3343 3344 rcu_read_lock(); 3345 list_for_each_entry_rcu(dumper, &dump_list, list) { 3346 enum kmsg_dump_reason max_reason = dumper->max_reason; 3347 3348 /* 3349 * If client has not provided a specific max_reason, default 3350 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set. 3351 */ 3352 if (max_reason == KMSG_DUMP_UNDEF) { 3353 max_reason = always_kmsg_dump ? KMSG_DUMP_MAX : 3354 KMSG_DUMP_OOPS; 3355 } 3356 if (reason > max_reason) 3357 continue; 3358 3359 /* invoke dumper which will iterate over records */ 3360 dumper->dump(dumper, reason); 3361 } 3362 rcu_read_unlock(); 3363 } 3364 3365 /** 3366 * kmsg_dump_get_line - retrieve one kmsg log line 3367 * @iter: kmsg dump iterator 3368 * @syslog: include the "<4>" prefixes 3369 * @line: buffer to copy the line to 3370 * @size: maximum size of the buffer 3371 * @len: length of line placed into buffer 3372 * 3373 * Start at the beginning of the kmsg buffer, with the oldest kmsg 3374 * record, and copy one record into the provided buffer. 3375 * 3376 * Consecutive calls will return the next available record moving 3377 * towards the end of the buffer with the youngest messages. 3378 * 3379 * A return value of FALSE indicates that there are no more records to 3380 * read. 3381 */ 3382 bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, 3383 char *line, size_t size, size_t *len) 3384 { 3385 u64 min_seq = latched_seq_read_nolock(&clear_seq); 3386 struct printk_info info; 3387 unsigned int line_count; 3388 struct printk_record r; 3389 unsigned long flags; 3390 size_t l = 0; 3391 bool ret = false; 3392 3393 if (iter->cur_seq < min_seq) 3394 iter->cur_seq = min_seq; 3395 3396 printk_safe_enter_irqsave(flags); 3397 prb_rec_init_rd(&r, &info, line, size); 3398 3399 /* Read text or count text lines? */ 3400 if (line) { 3401 if (!prb_read_valid(prb, iter->cur_seq, &r)) 3402 goto out; 3403 l = record_print_text(&r, syslog, printk_time); 3404 } else { 3405 if (!prb_read_valid_info(prb, iter->cur_seq, 3406 &info, &line_count)) { 3407 goto out; 3408 } 3409 l = get_record_print_text_size(&info, line_count, syslog, 3410 printk_time); 3411 3412 } 3413 3414 iter->cur_seq = r.info->seq + 1; 3415 ret = true; 3416 out: 3417 printk_safe_exit_irqrestore(flags); 3418 if (len) 3419 *len = l; 3420 return ret; 3421 } 3422 EXPORT_SYMBOL_GPL(kmsg_dump_get_line); 3423 3424 /** 3425 * kmsg_dump_get_buffer - copy kmsg log lines 3426 * @iter: kmsg dump iterator 3427 * @syslog: include the "<4>" prefixes 3428 * @buf: buffer to copy the line to 3429 * @size: maximum size of the buffer 3430 * @len_out: length of line placed into buffer 3431 * 3432 * Start at the end of the kmsg buffer and fill the provided buffer 3433 * with as many of the *youngest* kmsg records that fit into it. 3434 * If the buffer is large enough, all available kmsg records will be 3435 * copied with a single call. 3436 * 3437 * Consecutive calls will fill the buffer with the next block of 3438 * available older records, not including the earlier retrieved ones. 3439 * 3440 * A return value of FALSE indicates that there are no more records to 3441 * read. 3442 */ 3443 bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, 3444 char *buf, size_t size, size_t *len_out) 3445 { 3446 u64 min_seq = latched_seq_read_nolock(&clear_seq); 3447 struct printk_info info; 3448 struct printk_record r; 3449 unsigned long flags; 3450 u64 seq; 3451 u64 next_seq; 3452 size_t len = 0; 3453 bool ret = false; 3454 bool time = printk_time; 3455 3456 if (!buf || !size) 3457 goto out; 3458 3459 if (iter->cur_seq < min_seq) 3460 iter->cur_seq = min_seq; 3461 3462 printk_safe_enter_irqsave(flags); 3463 if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { 3464 if (info.seq != iter->cur_seq) { 3465 /* messages are gone, move to first available one */ 3466 iter->cur_seq = info.seq; 3467 } 3468 } 3469 3470 /* last entry */ 3471 if (iter->cur_seq >= iter->next_seq) { 3472 printk_safe_exit_irqrestore(flags); 3473 goto out; 3474 } 3475 3476 /* 3477 * Find first record that fits, including all following records, 3478 * into the user-provided buffer for this dump. Pass in size-1 3479 * because this function (by way of record_print_text()) will 3480 * not write more than size-1 bytes of text into @buf. 3481 */ 3482 seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, 3483 size - 1, syslog, time); 3484 3485 /* 3486 * Next kmsg_dump_get_buffer() invocation will dump block of 3487 * older records stored right before this one. 3488 */ 3489 next_seq = seq; 3490 3491 prb_rec_init_rd(&r, &info, buf, size); 3492 3493 len = 0; 3494 prb_for_each_record(seq, prb, seq, &r) { 3495 if (r.info->seq >= iter->next_seq) 3496 break; 3497 3498 len += record_print_text(&r, syslog, time); 3499 3500 /* Adjust record to store to remaining buffer space. */ 3501 prb_rec_init_rd(&r, &info, buf + len, size - len); 3502 } 3503 3504 iter->next_seq = next_seq; 3505 ret = true; 3506 printk_safe_exit_irqrestore(flags); 3507 out: 3508 if (len_out) 3509 *len_out = len; 3510 return ret; 3511 } 3512 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); 3513 3514 /** 3515 * kmsg_dump_rewind - reset the iterator 3516 * @iter: kmsg dump iterator 3517 * 3518 * Reset the dumper's iterator so that kmsg_dump_get_line() and 3519 * kmsg_dump_get_buffer() can be called again and used multiple 3520 * times within the same dumper.dump() callback. 3521 */ 3522 void kmsg_dump_rewind(struct kmsg_dump_iter *iter) 3523 { 3524 unsigned long flags; 3525 3526 printk_safe_enter_irqsave(flags); 3527 iter->cur_seq = latched_seq_read_nolock(&clear_seq); 3528 iter->next_seq = prb_next_seq(prb); 3529 printk_safe_exit_irqrestore(flags); 3530 } 3531 EXPORT_SYMBOL_GPL(kmsg_dump_rewind); 3532 3533 #endif 3534