1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/printk.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * Modified to make sys_syslog() more flexible: added commands to 8 * return the last 4k of kernel messages, regardless of whether 9 * they've been read or not. Added option to suppress kernel printk's 10 * to the console. Added hook for sending the console messages 11 * elsewhere, in preparation for a serial line console (someday). 12 * Ted Ts'o, 2/11/93. 13 * Modified for sysctl support, 1/8/97, Chris Horn. 14 * Fixed SMP synchronization, 08/08/99, Manfred Spraul 15 * manfred@colorfullife.com 16 * Rewrote bits to get rid of console_lock 17 * 01Mar01 Andrew Morton 18 */ 19 20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 21 22 #include <linux/kernel.h> 23 #include <linux/mm.h> 24 #include <linux/tty.h> 25 #include <linux/tty_driver.h> 26 #include <linux/console.h> 27 #include <linux/init.h> 28 #include <linux/jiffies.h> 29 #include <linux/nmi.h> 30 #include <linux/module.h> 31 #include <linux/moduleparam.h> 32 #include <linux/delay.h> 33 #include <linux/smp.h> 34 #include <linux/security.h> 35 #include <linux/memblock.h> 36 #include <linux/syscalls.h> 37 #include <linux/syscore_ops.h> 38 #include <linux/vmcore_info.h> 39 #include <linux/ratelimit.h> 40 #include <linux/kmsg_dump.h> 41 #include <linux/syslog.h> 42 #include <linux/cpu.h> 43 #include <linux/rculist.h> 44 #include <linux/poll.h> 45 #include <linux/irq_work.h> 46 #include <linux/ctype.h> 47 #include <linux/uio.h> 48 #include <linux/sched/clock.h> 49 #include <linux/sched/debug.h> 50 #include <linux/sched/task_stack.h> 51 52 #include <linux/uaccess.h> 53 #include <asm/sections.h> 54 55 #include <trace/events/initcall.h> 56 #define CREATE_TRACE_POINTS 57 #include <trace/events/printk.h> 58 59 #include "printk_ringbuffer.h" 60 #include "console_cmdline.h" 61 #include "braille.h" 62 #include "internal.h" 63 64 int console_printk[4] = { 65 CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ 66 MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ 67 CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ 68 CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ 69 }; 70 EXPORT_SYMBOL_GPL(console_printk); 71 72 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); 73 EXPORT_SYMBOL(ignore_console_lock_warning); 74 75 EXPORT_TRACEPOINT_SYMBOL_GPL(console); 76 77 /* 78 * Low level drivers may need that to know if they can schedule in 79 * their unblank() callback or not. So let's export it. 80 */ 81 int oops_in_progress; 82 EXPORT_SYMBOL(oops_in_progress); 83 84 /* 85 * console_mutex protects console_list updates and console->flags updates. 86 * The flags are synchronized only for consoles that are registered, i.e. 87 * accessible via the console list. 88 */ 89 static DEFINE_MUTEX(console_mutex); 90 91 /* 92 * console_sem protects updates to console->seq 93 * and also provides serialization for console printing. 94 */ 95 static DEFINE_SEMAPHORE(console_sem, 1); 96 HLIST_HEAD(console_list); 97 EXPORT_SYMBOL_GPL(console_list); 98 DEFINE_STATIC_SRCU(console_srcu); 99 100 /* 101 * System may need to suppress printk message under certain 102 * circumstances, like after kernel panic happens. 103 */ 104 int __read_mostly suppress_printk; 105 106 #ifdef CONFIG_LOCKDEP 107 static struct lockdep_map console_lock_dep_map = { 108 .name = "console_lock" 109 }; 110 111 void lockdep_assert_console_list_lock_held(void) 112 { 113 lockdep_assert_held(&console_mutex); 114 } 115 EXPORT_SYMBOL(lockdep_assert_console_list_lock_held); 116 #endif 117 118 #ifdef CONFIG_DEBUG_LOCK_ALLOC 119 bool console_srcu_read_lock_is_held(void) 120 { 121 return srcu_read_lock_held(&console_srcu); 122 } 123 EXPORT_SYMBOL(console_srcu_read_lock_is_held); 124 #endif 125 126 enum devkmsg_log_bits { 127 __DEVKMSG_LOG_BIT_ON = 0, 128 __DEVKMSG_LOG_BIT_OFF, 129 __DEVKMSG_LOG_BIT_LOCK, 130 }; 131 132 enum devkmsg_log_masks { 133 DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), 134 DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), 135 DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), 136 }; 137 138 /* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ 139 #define DEVKMSG_LOG_MASK_DEFAULT 0 140 141 static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 142 143 static int __control_devkmsg(char *str) 144 { 145 size_t len; 146 147 if (!str) 148 return -EINVAL; 149 150 len = str_has_prefix(str, "on"); 151 if (len) { 152 devkmsg_log = DEVKMSG_LOG_MASK_ON; 153 return len; 154 } 155 156 len = str_has_prefix(str, "off"); 157 if (len) { 158 devkmsg_log = DEVKMSG_LOG_MASK_OFF; 159 return len; 160 } 161 162 len = str_has_prefix(str, "ratelimit"); 163 if (len) { 164 devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; 165 return len; 166 } 167 168 return -EINVAL; 169 } 170 171 static int __init control_devkmsg(char *str) 172 { 173 if (__control_devkmsg(str) < 0) { 174 pr_warn("printk.devkmsg: bad option string '%s'\n", str); 175 return 1; 176 } 177 178 /* 179 * Set sysctl string accordingly: 180 */ 181 if (devkmsg_log == DEVKMSG_LOG_MASK_ON) 182 strscpy(devkmsg_log_str, "on"); 183 else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) 184 strscpy(devkmsg_log_str, "off"); 185 /* else "ratelimit" which is set by default. */ 186 187 /* 188 * Sysctl cannot change it anymore. The kernel command line setting of 189 * this parameter is to force the setting to be permanent throughout the 190 * runtime of the system. This is a precation measure against userspace 191 * trying to be a smarta** and attempting to change it up on us. 192 */ 193 devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; 194 195 return 1; 196 } 197 __setup("printk.devkmsg=", control_devkmsg); 198 199 char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; 200 #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) 201 int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, 202 void *buffer, size_t *lenp, loff_t *ppos) 203 { 204 char old_str[DEVKMSG_STR_MAX_SIZE]; 205 unsigned int old; 206 int err; 207 208 if (write) { 209 if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) 210 return -EINVAL; 211 212 old = devkmsg_log; 213 strscpy(old_str, devkmsg_log_str); 214 } 215 216 err = proc_dostring(table, write, buffer, lenp, ppos); 217 if (err) 218 return err; 219 220 if (write) { 221 err = __control_devkmsg(devkmsg_log_str); 222 223 /* 224 * Do not accept an unknown string OR a known string with 225 * trailing crap... 226 */ 227 if (err < 0 || (err + 1 != *lenp)) { 228 229 /* ... and restore old setting. */ 230 devkmsg_log = old; 231 strscpy(devkmsg_log_str, old_str); 232 233 return -EINVAL; 234 } 235 } 236 237 return 0; 238 } 239 #endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ 240 241 /** 242 * console_list_lock - Lock the console list 243 * 244 * For console list or console->flags updates 245 */ 246 void console_list_lock(void) 247 { 248 /* 249 * In unregister_console() and console_force_preferred_locked(), 250 * synchronize_srcu() is called with the console_list_lock held. 251 * Therefore it is not allowed that the console_list_lock is taken 252 * with the srcu_lock held. 253 * 254 * Detecting if this context is really in the read-side critical 255 * section is only possible if the appropriate debug options are 256 * enabled. 257 */ 258 WARN_ON_ONCE(debug_lockdep_rcu_enabled() && 259 srcu_read_lock_held(&console_srcu)); 260 261 mutex_lock(&console_mutex); 262 } 263 EXPORT_SYMBOL(console_list_lock); 264 265 /** 266 * console_list_unlock - Unlock the console list 267 * 268 * Counterpart to console_list_lock() 269 */ 270 void console_list_unlock(void) 271 { 272 mutex_unlock(&console_mutex); 273 } 274 EXPORT_SYMBOL(console_list_unlock); 275 276 /** 277 * console_srcu_read_lock - Register a new reader for the 278 * SRCU-protected console list 279 * 280 * Use for_each_console_srcu() to iterate the console list 281 * 282 * Context: Any context. 283 * Return: A cookie to pass to console_srcu_read_unlock(). 284 */ 285 int console_srcu_read_lock(void) 286 __acquires(&console_srcu) 287 { 288 return srcu_read_lock_nmisafe(&console_srcu); 289 } 290 EXPORT_SYMBOL(console_srcu_read_lock); 291 292 /** 293 * console_srcu_read_unlock - Unregister an old reader from 294 * the SRCU-protected console list 295 * @cookie: cookie returned from console_srcu_read_lock() 296 * 297 * Counterpart to console_srcu_read_lock() 298 */ 299 void console_srcu_read_unlock(int cookie) 300 __releases(&console_srcu) 301 { 302 srcu_read_unlock_nmisafe(&console_srcu, cookie); 303 } 304 EXPORT_SYMBOL(console_srcu_read_unlock); 305 306 /* 307 * Helper macros to handle lockdep when locking/unlocking console_sem. We use 308 * macros instead of functions so that _RET_IP_ contains useful information. 309 */ 310 #define down_console_sem() do { \ 311 down(&console_sem);\ 312 mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ 313 } while (0) 314 315 static int __down_trylock_console_sem(unsigned long ip) 316 { 317 int lock_failed; 318 unsigned long flags; 319 320 /* 321 * Here and in __up_console_sem() we need to be in safe mode, 322 * because spindump/WARN/etc from under console ->lock will 323 * deadlock in printk()->down_trylock_console_sem() otherwise. 324 */ 325 printk_safe_enter_irqsave(flags); 326 lock_failed = down_trylock(&console_sem); 327 printk_safe_exit_irqrestore(flags); 328 329 if (lock_failed) 330 return 1; 331 mutex_acquire(&console_lock_dep_map, 0, 1, ip); 332 return 0; 333 } 334 #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) 335 336 static void __up_console_sem(unsigned long ip) 337 { 338 unsigned long flags; 339 340 mutex_release(&console_lock_dep_map, ip); 341 342 printk_safe_enter_irqsave(flags); 343 up(&console_sem); 344 printk_safe_exit_irqrestore(flags); 345 } 346 #define up_console_sem() __up_console_sem(_RET_IP_) 347 348 static bool panic_in_progress(void) 349 { 350 return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); 351 } 352 353 /* Return true if a panic is in progress on the current CPU. */ 354 bool this_cpu_in_panic(void) 355 { 356 /* 357 * We can use raw_smp_processor_id() here because it is impossible for 358 * the task to be migrated to the panic_cpu, or away from it. If 359 * panic_cpu has already been set, and we're not currently executing on 360 * that CPU, then we never will be. 361 */ 362 return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); 363 } 364 365 /* 366 * Return true if a panic is in progress on a remote CPU. 367 * 368 * On true, the local CPU should immediately release any printing resources 369 * that may be needed by the panic CPU. 370 */ 371 bool other_cpu_in_panic(void) 372 { 373 return (panic_in_progress() && !this_cpu_in_panic()); 374 } 375 376 /* 377 * This is used for debugging the mess that is the VT code by 378 * keeping track if we have the console semaphore held. It's 379 * definitely not the perfect debug tool (we don't know if _WE_ 380 * hold it and are racing, but it helps tracking those weird code 381 * paths in the console code where we end up in places I want 382 * locked without the console semaphore held). 383 */ 384 static int console_locked; 385 386 /* 387 * Array of consoles built from command line options (console=) 388 */ 389 390 #define MAX_CMDLINECONSOLES 8 391 392 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; 393 394 static int preferred_console = -1; 395 int console_set_on_cmdline; 396 EXPORT_SYMBOL(console_set_on_cmdline); 397 398 /* Flag: console code may call schedule() */ 399 static int console_may_schedule; 400 401 enum con_msg_format_flags { 402 MSG_FORMAT_DEFAULT = 0, 403 MSG_FORMAT_SYSLOG = (1 << 0), 404 }; 405 406 static int console_msg_format = MSG_FORMAT_DEFAULT; 407 408 /* 409 * The printk log buffer consists of a sequenced collection of records, each 410 * containing variable length message text. Every record also contains its 411 * own meta-data (@info). 412 * 413 * Every record meta-data carries the timestamp in microseconds, as well as 414 * the standard userspace syslog level and syslog facility. The usual kernel 415 * messages use LOG_KERN; userspace-injected messages always carry a matching 416 * syslog facility, by default LOG_USER. The origin of every message can be 417 * reliably determined that way. 418 * 419 * The human readable log message of a record is available in @text, the 420 * length of the message text in @text_len. The stored message is not 421 * terminated. 422 * 423 * Optionally, a record can carry a dictionary of properties (key/value 424 * pairs), to provide userspace with a machine-readable message context. 425 * 426 * Examples for well-defined, commonly used property names are: 427 * DEVICE=b12:8 device identifier 428 * b12:8 block dev_t 429 * c127:3 char dev_t 430 * n8 netdev ifindex 431 * +sound:card0 subsystem:devname 432 * SUBSYSTEM=pci driver-core subsystem name 433 * 434 * Valid characters in property names are [a-zA-Z0-9.-_]. Property names 435 * and values are terminated by a '\0' character. 436 * 437 * Example of record values: 438 * record.text_buf = "it's a line" (unterminated) 439 * record.info.seq = 56 440 * record.info.ts_nsec = 36863 441 * record.info.text_len = 11 442 * record.info.facility = 0 (LOG_KERN) 443 * record.info.flags = 0 444 * record.info.level = 3 (LOG_ERR) 445 * record.info.caller_id = 299 (task 299) 446 * record.info.dev_info.subsystem = "pci" (terminated) 447 * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) 448 * 449 * The 'struct printk_info' buffer must never be directly exported to 450 * userspace, it is a kernel-private implementation detail that might 451 * need to be changed in the future, when the requirements change. 452 * 453 * /dev/kmsg exports the structured data in the following line format: 454 * "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n" 455 * 456 * Users of the export format should ignore possible additional values 457 * separated by ',', and find the message after the ';' character. 458 * 459 * The optional key/value pairs are attached as continuation lines starting 460 * with a space character and terminated by a newline. All possible 461 * non-prinatable characters are escaped in the "\xff" notation. 462 */ 463 464 /* syslog_lock protects syslog_* variables and write access to clear_seq. */ 465 static DEFINE_MUTEX(syslog_lock); 466 467 /* 468 * Specifies if a legacy console is registered. If legacy consoles are 469 * present, it is necessary to perform the console lock/unlock dance 470 * whenever console flushing should occur. 471 */ 472 bool have_legacy_console; 473 474 /* 475 * Specifies if an nbcon console is registered. If nbcon consoles are present, 476 * synchronous printing of legacy consoles will not occur during panic until 477 * the backtrace has been stored to the ringbuffer. 478 */ 479 bool have_nbcon_console; 480 481 /* 482 * Specifies if a boot console is registered. If boot consoles are present, 483 * nbcon consoles cannot print simultaneously and must be synchronized by 484 * the console lock. This is because boot consoles and nbcon consoles may 485 * have mapped the same hardware. 486 */ 487 bool have_boot_console; 488 489 /* See printk_legacy_allow_panic_sync() for details. */ 490 bool legacy_allow_panic_sync; 491 492 #ifdef CONFIG_PRINTK 493 DECLARE_WAIT_QUEUE_HEAD(log_wait); 494 static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); 495 /* All 3 protected by @syslog_lock. */ 496 /* the next printk record to read by syslog(READ) or /proc/kmsg */ 497 static u64 syslog_seq; 498 static size_t syslog_partial; 499 static bool syslog_time; 500 501 /* True when _all_ printer threads are available for printing. */ 502 bool printk_kthreads_running; 503 504 struct latched_seq { 505 seqcount_latch_t latch; 506 u64 val[2]; 507 }; 508 509 /* 510 * The next printk record to read after the last 'clear' command. There are 511 * two copies (updated with seqcount_latch) so that reads can locklessly 512 * access a valid value. Writers are synchronized by @syslog_lock. 513 */ 514 static struct latched_seq clear_seq = { 515 .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), 516 .val[0] = 0, 517 .val[1] = 0, 518 }; 519 520 #define LOG_LEVEL(v) ((v) & 0x07) 521 #define LOG_FACILITY(v) ((v) >> 3 & 0xff) 522 523 /* record buffer */ 524 #define LOG_ALIGN __alignof__(unsigned long) 525 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 526 #define LOG_BUF_LEN_MAX (u32)(1 << 31) 527 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 528 static char *log_buf = __log_buf; 529 static u32 log_buf_len = __LOG_BUF_LEN; 530 531 /* 532 * Define the average message size. This only affects the number of 533 * descriptors that will be available. Underestimating is better than 534 * overestimating (too many available descriptors is better than not enough). 535 */ 536 #define PRB_AVGBITS 5 /* 32 character average length */ 537 538 #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS 539 #error CONFIG_LOG_BUF_SHIFT value too small. 540 #endif 541 _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, 542 PRB_AVGBITS, &__log_buf[0]); 543 544 static struct printk_ringbuffer printk_rb_dynamic; 545 546 struct printk_ringbuffer *prb = &printk_rb_static; 547 548 /* 549 * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before 550 * per_cpu_areas are initialised. This variable is set to true when 551 * it's safe to access per-CPU data. 552 */ 553 static bool __printk_percpu_data_ready __ro_after_init; 554 555 bool printk_percpu_data_ready(void) 556 { 557 return __printk_percpu_data_ready; 558 } 559 560 /* Must be called under syslog_lock. */ 561 static void latched_seq_write(struct latched_seq *ls, u64 val) 562 { 563 raw_write_seqcount_latch(&ls->latch); 564 ls->val[0] = val; 565 raw_write_seqcount_latch(&ls->latch); 566 ls->val[1] = val; 567 } 568 569 /* Can be called from any context. */ 570 static u64 latched_seq_read_nolock(struct latched_seq *ls) 571 { 572 unsigned int seq; 573 unsigned int idx; 574 u64 val; 575 576 do { 577 seq = raw_read_seqcount_latch(&ls->latch); 578 idx = seq & 0x1; 579 val = ls->val[idx]; 580 } while (raw_read_seqcount_latch_retry(&ls->latch, seq)); 581 582 return val; 583 } 584 585 /* Return log buffer address */ 586 char *log_buf_addr_get(void) 587 { 588 return log_buf; 589 } 590 591 /* Return log buffer size */ 592 u32 log_buf_len_get(void) 593 { 594 return log_buf_len; 595 } 596 597 /* 598 * Define how much of the log buffer we could take at maximum. The value 599 * must be greater than two. Note that only half of the buffer is available 600 * when the index points to the middle. 601 */ 602 #define MAX_LOG_TAKE_PART 4 603 static const char trunc_msg[] = "<truncated>"; 604 605 static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) 606 { 607 /* 608 * The message should not take the whole buffer. Otherwise, it might 609 * get removed too soon. 610 */ 611 u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; 612 613 if (*text_len > max_text_len) 614 *text_len = max_text_len; 615 616 /* enable the warning message (if there is room) */ 617 *trunc_msg_len = strlen(trunc_msg); 618 if (*text_len >= *trunc_msg_len) 619 *text_len -= *trunc_msg_len; 620 else 621 *trunc_msg_len = 0; 622 } 623 624 int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); 625 626 static int syslog_action_restricted(int type) 627 { 628 if (dmesg_restrict) 629 return 1; 630 /* 631 * Unless restricted, we allow "read all" and "get buffer size" 632 * for everybody. 633 */ 634 return type != SYSLOG_ACTION_READ_ALL && 635 type != SYSLOG_ACTION_SIZE_BUFFER; 636 } 637 638 static int check_syslog_permissions(int type, int source) 639 { 640 /* 641 * If this is from /proc/kmsg and we've already opened it, then we've 642 * already done the capabilities checks at open time. 643 */ 644 if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) 645 goto ok; 646 647 if (syslog_action_restricted(type)) { 648 if (capable(CAP_SYSLOG)) 649 goto ok; 650 return -EPERM; 651 } 652 ok: 653 return security_syslog(type); 654 } 655 656 static void append_char(char **pp, char *e, char c) 657 { 658 if (*pp < e) 659 *(*pp)++ = c; 660 } 661 662 static ssize_t info_print_ext_header(char *buf, size_t size, 663 struct printk_info *info) 664 { 665 u64 ts_usec = info->ts_nsec; 666 char caller[20]; 667 #ifdef CONFIG_PRINTK_CALLER 668 u32 id = info->caller_id; 669 670 snprintf(caller, sizeof(caller), ",caller=%c%u", 671 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 672 #else 673 caller[0] = '\0'; 674 #endif 675 676 do_div(ts_usec, 1000); 677 678 return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", 679 (info->facility << 3) | info->level, info->seq, 680 ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); 681 } 682 683 static ssize_t msg_add_ext_text(char *buf, size_t size, 684 const char *text, size_t text_len, 685 unsigned char endc) 686 { 687 char *p = buf, *e = buf + size; 688 size_t i; 689 690 /* escape non-printable characters */ 691 for (i = 0; i < text_len; i++) { 692 unsigned char c = text[i]; 693 694 if (c < ' ' || c >= 127 || c == '\\') 695 p += scnprintf(p, e - p, "\\x%02x", c); 696 else 697 append_char(&p, e, c); 698 } 699 append_char(&p, e, endc); 700 701 return p - buf; 702 } 703 704 static ssize_t msg_add_dict_text(char *buf, size_t size, 705 const char *key, const char *val) 706 { 707 size_t val_len = strlen(val); 708 ssize_t len; 709 710 if (!val_len) 711 return 0; 712 713 len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ 714 len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); 715 len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); 716 717 return len; 718 } 719 720 static ssize_t msg_print_ext_body(char *buf, size_t size, 721 char *text, size_t text_len, 722 struct dev_printk_info *dev_info) 723 { 724 ssize_t len; 725 726 len = msg_add_ext_text(buf, size, text, text_len, '\n'); 727 728 if (!dev_info) 729 goto out; 730 731 len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", 732 dev_info->subsystem); 733 len += msg_add_dict_text(buf + len, size - len, "DEVICE", 734 dev_info->device); 735 out: 736 return len; 737 } 738 739 /* /dev/kmsg - userspace message inject/listen interface */ 740 struct devkmsg_user { 741 atomic64_t seq; 742 struct ratelimit_state rs; 743 struct mutex lock; 744 struct printk_buffers pbufs; 745 }; 746 747 static __printf(3, 4) __cold 748 int devkmsg_emit(int facility, int level, const char *fmt, ...) 749 { 750 va_list args; 751 int r; 752 753 va_start(args, fmt); 754 r = vprintk_emit(facility, level, NULL, fmt, args); 755 va_end(args); 756 757 return r; 758 } 759 760 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) 761 { 762 char *buf, *line; 763 int level = default_message_loglevel; 764 int facility = 1; /* LOG_USER */ 765 struct file *file = iocb->ki_filp; 766 struct devkmsg_user *user = file->private_data; 767 size_t len = iov_iter_count(from); 768 ssize_t ret = len; 769 770 if (len > PRINTKRB_RECORD_MAX) 771 return -EINVAL; 772 773 /* Ignore when user logging is disabled. */ 774 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 775 return len; 776 777 /* Ratelimit when not explicitly enabled. */ 778 if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { 779 if (!___ratelimit(&user->rs, current->comm)) 780 return ret; 781 } 782 783 buf = kmalloc(len+1, GFP_KERNEL); 784 if (buf == NULL) 785 return -ENOMEM; 786 787 buf[len] = '\0'; 788 if (!copy_from_iter_full(buf, len, from)) { 789 kfree(buf); 790 return -EFAULT; 791 } 792 793 /* 794 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace 795 * the decimal value represents 32bit, the lower 3 bit are the log 796 * level, the rest are the log facility. 797 * 798 * If no prefix or no userspace facility is specified, we 799 * enforce LOG_USER, to be able to reliably distinguish 800 * kernel-generated messages from userspace-injected ones. 801 */ 802 line = buf; 803 if (line[0] == '<') { 804 char *endp = NULL; 805 unsigned int u; 806 807 u = simple_strtoul(line + 1, &endp, 10); 808 if (endp && endp[0] == '>') { 809 level = LOG_LEVEL(u); 810 if (LOG_FACILITY(u) != 0) 811 facility = LOG_FACILITY(u); 812 endp++; 813 line = endp; 814 } 815 } 816 817 devkmsg_emit(facility, level, "%s", line); 818 kfree(buf); 819 return ret; 820 } 821 822 static ssize_t devkmsg_read(struct file *file, char __user *buf, 823 size_t count, loff_t *ppos) 824 { 825 struct devkmsg_user *user = file->private_data; 826 char *outbuf = &user->pbufs.outbuf[0]; 827 struct printk_message pmsg = { 828 .pbufs = &user->pbufs, 829 }; 830 ssize_t ret; 831 832 ret = mutex_lock_interruptible(&user->lock); 833 if (ret) 834 return ret; 835 836 if (!printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, false)) { 837 if (file->f_flags & O_NONBLOCK) { 838 ret = -EAGAIN; 839 goto out; 840 } 841 842 /* 843 * Guarantee this task is visible on the waitqueue before 844 * checking the wake condition. 845 * 846 * The full memory barrier within set_current_state() of 847 * prepare_to_wait_event() pairs with the full memory barrier 848 * within wq_has_sleeper(). 849 * 850 * This pairs with __wake_up_klogd:A. 851 */ 852 ret = wait_event_interruptible(log_wait, 853 printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, 854 false)); /* LMM(devkmsg_read:A) */ 855 if (ret) 856 goto out; 857 } 858 859 if (pmsg.dropped) { 860 /* our last seen message is gone, return error and reset */ 861 atomic64_set(&user->seq, pmsg.seq); 862 ret = -EPIPE; 863 goto out; 864 } 865 866 atomic64_set(&user->seq, pmsg.seq + 1); 867 868 if (pmsg.outbuf_len > count) { 869 ret = -EINVAL; 870 goto out; 871 } 872 873 if (copy_to_user(buf, outbuf, pmsg.outbuf_len)) { 874 ret = -EFAULT; 875 goto out; 876 } 877 ret = pmsg.outbuf_len; 878 out: 879 mutex_unlock(&user->lock); 880 return ret; 881 } 882 883 /* 884 * Be careful when modifying this function!!! 885 * 886 * Only few operations are supported because the device works only with the 887 * entire variable length messages (records). Non-standard values are 888 * returned in the other cases and has been this way for quite some time. 889 * User space applications might depend on this behavior. 890 */ 891 static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) 892 { 893 struct devkmsg_user *user = file->private_data; 894 loff_t ret = 0; 895 896 if (offset) 897 return -ESPIPE; 898 899 switch (whence) { 900 case SEEK_SET: 901 /* the first record */ 902 atomic64_set(&user->seq, prb_first_valid_seq(prb)); 903 break; 904 case SEEK_DATA: 905 /* 906 * The first record after the last SYSLOG_ACTION_CLEAR, 907 * like issued by 'dmesg -c'. Reading /dev/kmsg itself 908 * changes no global state, and does not clear anything. 909 */ 910 atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); 911 break; 912 case SEEK_END: 913 /* after the last record */ 914 atomic64_set(&user->seq, prb_next_seq(prb)); 915 break; 916 default: 917 ret = -EINVAL; 918 } 919 return ret; 920 } 921 922 static __poll_t devkmsg_poll(struct file *file, poll_table *wait) 923 { 924 struct devkmsg_user *user = file->private_data; 925 struct printk_info info; 926 __poll_t ret = 0; 927 928 poll_wait(file, &log_wait, wait); 929 930 if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { 931 /* return error when data has vanished underneath us */ 932 if (info.seq != atomic64_read(&user->seq)) 933 ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; 934 else 935 ret = EPOLLIN|EPOLLRDNORM; 936 } 937 938 return ret; 939 } 940 941 static int devkmsg_open(struct inode *inode, struct file *file) 942 { 943 struct devkmsg_user *user; 944 int err; 945 946 if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) 947 return -EPERM; 948 949 /* write-only does not need any file context */ 950 if ((file->f_flags & O_ACCMODE) != O_WRONLY) { 951 err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, 952 SYSLOG_FROM_READER); 953 if (err) 954 return err; 955 } 956 957 user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); 958 if (!user) 959 return -ENOMEM; 960 961 ratelimit_default_init(&user->rs); 962 ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); 963 964 mutex_init(&user->lock); 965 966 atomic64_set(&user->seq, prb_first_valid_seq(prb)); 967 968 file->private_data = user; 969 return 0; 970 } 971 972 static int devkmsg_release(struct inode *inode, struct file *file) 973 { 974 struct devkmsg_user *user = file->private_data; 975 976 ratelimit_state_exit(&user->rs); 977 978 mutex_destroy(&user->lock); 979 kvfree(user); 980 return 0; 981 } 982 983 const struct file_operations kmsg_fops = { 984 .open = devkmsg_open, 985 .read = devkmsg_read, 986 .write_iter = devkmsg_write, 987 .llseek = devkmsg_llseek, 988 .poll = devkmsg_poll, 989 .release = devkmsg_release, 990 }; 991 992 #ifdef CONFIG_VMCORE_INFO 993 /* 994 * This appends the listed symbols to /proc/vmcore 995 * 996 * /proc/vmcore is used by various utilities, like crash and makedumpfile to 997 * obtain access to symbols that are otherwise very difficult to locate. These 998 * symbols are specifically used so that utilities can access and extract the 999 * dmesg log from a vmcore file after a crash. 1000 */ 1001 void log_buf_vmcoreinfo_setup(void) 1002 { 1003 struct dev_printk_info *dev_info = NULL; 1004 1005 VMCOREINFO_SYMBOL(prb); 1006 VMCOREINFO_SYMBOL(printk_rb_static); 1007 VMCOREINFO_SYMBOL(clear_seq); 1008 1009 /* 1010 * Export struct size and field offsets. User space tools can 1011 * parse it and detect any changes to structure down the line. 1012 */ 1013 1014 VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); 1015 VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); 1016 VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); 1017 VMCOREINFO_OFFSET(printk_ringbuffer, fail); 1018 1019 VMCOREINFO_STRUCT_SIZE(prb_desc_ring); 1020 VMCOREINFO_OFFSET(prb_desc_ring, count_bits); 1021 VMCOREINFO_OFFSET(prb_desc_ring, descs); 1022 VMCOREINFO_OFFSET(prb_desc_ring, infos); 1023 VMCOREINFO_OFFSET(prb_desc_ring, head_id); 1024 VMCOREINFO_OFFSET(prb_desc_ring, tail_id); 1025 1026 VMCOREINFO_STRUCT_SIZE(prb_desc); 1027 VMCOREINFO_OFFSET(prb_desc, state_var); 1028 VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); 1029 1030 VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); 1031 VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); 1032 VMCOREINFO_OFFSET(prb_data_blk_lpos, next); 1033 1034 VMCOREINFO_STRUCT_SIZE(printk_info); 1035 VMCOREINFO_OFFSET(printk_info, seq); 1036 VMCOREINFO_OFFSET(printk_info, ts_nsec); 1037 VMCOREINFO_OFFSET(printk_info, text_len); 1038 VMCOREINFO_OFFSET(printk_info, caller_id); 1039 VMCOREINFO_OFFSET(printk_info, dev_info); 1040 1041 VMCOREINFO_STRUCT_SIZE(dev_printk_info); 1042 VMCOREINFO_OFFSET(dev_printk_info, subsystem); 1043 VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); 1044 VMCOREINFO_OFFSET(dev_printk_info, device); 1045 VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); 1046 1047 VMCOREINFO_STRUCT_SIZE(prb_data_ring); 1048 VMCOREINFO_OFFSET(prb_data_ring, size_bits); 1049 VMCOREINFO_OFFSET(prb_data_ring, data); 1050 VMCOREINFO_OFFSET(prb_data_ring, head_lpos); 1051 VMCOREINFO_OFFSET(prb_data_ring, tail_lpos); 1052 1053 VMCOREINFO_SIZE(atomic_long_t); 1054 VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); 1055 1056 VMCOREINFO_STRUCT_SIZE(latched_seq); 1057 VMCOREINFO_OFFSET(latched_seq, val); 1058 } 1059 #endif 1060 1061 /* requested log_buf_len from kernel cmdline */ 1062 static unsigned long __initdata new_log_buf_len; 1063 1064 /* we practice scaling the ring buffer by powers of 2 */ 1065 static void __init log_buf_len_update(u64 size) 1066 { 1067 if (size > (u64)LOG_BUF_LEN_MAX) { 1068 size = (u64)LOG_BUF_LEN_MAX; 1069 pr_err("log_buf over 2G is not supported.\n"); 1070 } 1071 1072 if (size) 1073 size = roundup_pow_of_two(size); 1074 if (size > log_buf_len) 1075 new_log_buf_len = (unsigned long)size; 1076 } 1077 1078 /* save requested log_buf_len since it's too early to process it */ 1079 static int __init log_buf_len_setup(char *str) 1080 { 1081 u64 size; 1082 1083 if (!str) 1084 return -EINVAL; 1085 1086 size = memparse(str, &str); 1087 1088 log_buf_len_update(size); 1089 1090 return 0; 1091 } 1092 early_param("log_buf_len", log_buf_len_setup); 1093 1094 #ifdef CONFIG_SMP 1095 #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) 1096 1097 static void __init log_buf_add_cpu(void) 1098 { 1099 unsigned int cpu_extra; 1100 1101 /* 1102 * archs should set up cpu_possible_bits properly with 1103 * set_cpu_possible() after setup_arch() but just in 1104 * case lets ensure this is valid. 1105 */ 1106 if (num_possible_cpus() == 1) 1107 return; 1108 1109 cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; 1110 1111 /* by default this will only continue through for large > 64 CPUs */ 1112 if (cpu_extra <= __LOG_BUF_LEN / 2) 1113 return; 1114 1115 pr_info("log_buf_len individual max cpu contribution: %d bytes\n", 1116 __LOG_CPU_MAX_BUF_LEN); 1117 pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", 1118 cpu_extra); 1119 pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); 1120 1121 log_buf_len_update(cpu_extra + __LOG_BUF_LEN); 1122 } 1123 #else /* !CONFIG_SMP */ 1124 static inline void log_buf_add_cpu(void) {} 1125 #endif /* CONFIG_SMP */ 1126 1127 static void __init set_percpu_data_ready(void) 1128 { 1129 __printk_percpu_data_ready = true; 1130 } 1131 1132 static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, 1133 struct printk_record *r) 1134 { 1135 struct prb_reserved_entry e; 1136 struct printk_record dest_r; 1137 1138 prb_rec_init_wr(&dest_r, r->info->text_len); 1139 1140 if (!prb_reserve(&e, rb, &dest_r)) 1141 return 0; 1142 1143 memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); 1144 dest_r.info->text_len = r->info->text_len; 1145 dest_r.info->facility = r->info->facility; 1146 dest_r.info->level = r->info->level; 1147 dest_r.info->flags = r->info->flags; 1148 dest_r.info->ts_nsec = r->info->ts_nsec; 1149 dest_r.info->caller_id = r->info->caller_id; 1150 memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); 1151 1152 prb_final_commit(&e); 1153 1154 return prb_record_text_space(&e); 1155 } 1156 1157 static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata; 1158 1159 void __init setup_log_buf(int early) 1160 { 1161 struct printk_info *new_infos; 1162 unsigned int new_descs_count; 1163 struct prb_desc *new_descs; 1164 struct printk_info info; 1165 struct printk_record r; 1166 unsigned int text_size; 1167 size_t new_descs_size; 1168 size_t new_infos_size; 1169 unsigned long flags; 1170 char *new_log_buf; 1171 unsigned int free; 1172 u64 seq; 1173 1174 /* 1175 * Some archs call setup_log_buf() multiple times - first is very 1176 * early, e.g. from setup_arch(), and second - when percpu_areas 1177 * are initialised. 1178 */ 1179 if (!early) 1180 set_percpu_data_ready(); 1181 1182 if (log_buf != __log_buf) 1183 return; 1184 1185 if (!early && !new_log_buf_len) 1186 log_buf_add_cpu(); 1187 1188 if (!new_log_buf_len) 1189 return; 1190 1191 new_descs_count = new_log_buf_len >> PRB_AVGBITS; 1192 if (new_descs_count == 0) { 1193 pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); 1194 return; 1195 } 1196 1197 new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); 1198 if (unlikely(!new_log_buf)) { 1199 pr_err("log_buf_len: %lu text bytes not available\n", 1200 new_log_buf_len); 1201 return; 1202 } 1203 1204 new_descs_size = new_descs_count * sizeof(struct prb_desc); 1205 new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); 1206 if (unlikely(!new_descs)) { 1207 pr_err("log_buf_len: %zu desc bytes not available\n", 1208 new_descs_size); 1209 goto err_free_log_buf; 1210 } 1211 1212 new_infos_size = new_descs_count * sizeof(struct printk_info); 1213 new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); 1214 if (unlikely(!new_infos)) { 1215 pr_err("log_buf_len: %zu info bytes not available\n", 1216 new_infos_size); 1217 goto err_free_descs; 1218 } 1219 1220 prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf)); 1221 1222 prb_init(&printk_rb_dynamic, 1223 new_log_buf, ilog2(new_log_buf_len), 1224 new_descs, ilog2(new_descs_count), 1225 new_infos); 1226 1227 local_irq_save(flags); 1228 1229 log_buf_len = new_log_buf_len; 1230 log_buf = new_log_buf; 1231 new_log_buf_len = 0; 1232 1233 free = __LOG_BUF_LEN; 1234 prb_for_each_record(0, &printk_rb_static, seq, &r) { 1235 text_size = add_to_rb(&printk_rb_dynamic, &r); 1236 if (text_size > free) 1237 free = 0; 1238 else 1239 free -= text_size; 1240 } 1241 1242 prb = &printk_rb_dynamic; 1243 1244 local_irq_restore(flags); 1245 1246 /* 1247 * Copy any remaining messages that might have appeared from 1248 * NMI context after copying but before switching to the 1249 * dynamic buffer. 1250 */ 1251 prb_for_each_record(seq, &printk_rb_static, seq, &r) { 1252 text_size = add_to_rb(&printk_rb_dynamic, &r); 1253 if (text_size > free) 1254 free = 0; 1255 else 1256 free -= text_size; 1257 } 1258 1259 if (seq != prb_next_seq(&printk_rb_static)) { 1260 pr_err("dropped %llu messages\n", 1261 prb_next_seq(&printk_rb_static) - seq); 1262 } 1263 1264 pr_info("log_buf_len: %u bytes\n", log_buf_len); 1265 pr_info("early log buf free: %u(%u%%)\n", 1266 free, (free * 100) / __LOG_BUF_LEN); 1267 return; 1268 1269 err_free_descs: 1270 memblock_free(new_descs, new_descs_size); 1271 err_free_log_buf: 1272 memblock_free(new_log_buf, new_log_buf_len); 1273 } 1274 1275 static bool __read_mostly ignore_loglevel; 1276 1277 static int __init ignore_loglevel_setup(char *str) 1278 { 1279 ignore_loglevel = true; 1280 pr_info("debug: ignoring loglevel setting.\n"); 1281 1282 return 0; 1283 } 1284 1285 early_param("ignore_loglevel", ignore_loglevel_setup); 1286 module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); 1287 MODULE_PARM_DESC(ignore_loglevel, 1288 "ignore loglevel setting (prints all kernel messages to the console)"); 1289 1290 static bool suppress_message_printing(int level) 1291 { 1292 return (level >= console_loglevel && !ignore_loglevel); 1293 } 1294 1295 #ifdef CONFIG_BOOT_PRINTK_DELAY 1296 1297 static int boot_delay; /* msecs delay after each printk during bootup */ 1298 static unsigned long long loops_per_msec; /* based on boot_delay */ 1299 1300 static int __init boot_delay_setup(char *str) 1301 { 1302 unsigned long lpj; 1303 1304 lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ 1305 loops_per_msec = (unsigned long long)lpj / 1000 * HZ; 1306 1307 get_option(&str, &boot_delay); 1308 if (boot_delay > 10 * 1000) 1309 boot_delay = 0; 1310 1311 pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, " 1312 "HZ: %d, loops_per_msec: %llu\n", 1313 boot_delay, preset_lpj, lpj, HZ, loops_per_msec); 1314 return 0; 1315 } 1316 early_param("boot_delay", boot_delay_setup); 1317 1318 static void boot_delay_msec(int level) 1319 { 1320 unsigned long long k; 1321 unsigned long timeout; 1322 1323 if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) 1324 || suppress_message_printing(level)) { 1325 return; 1326 } 1327 1328 k = (unsigned long long)loops_per_msec * boot_delay; 1329 1330 timeout = jiffies + msecs_to_jiffies(boot_delay); 1331 while (k) { 1332 k--; 1333 cpu_relax(); 1334 /* 1335 * use (volatile) jiffies to prevent 1336 * compiler reduction; loop termination via jiffies 1337 * is secondary and may or may not happen. 1338 */ 1339 if (time_after(jiffies, timeout)) 1340 break; 1341 touch_nmi_watchdog(); 1342 } 1343 } 1344 #else 1345 static inline void boot_delay_msec(int level) 1346 { 1347 } 1348 #endif 1349 1350 static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); 1351 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 1352 1353 static size_t print_syslog(unsigned int level, char *buf) 1354 { 1355 return sprintf(buf, "<%u>", level); 1356 } 1357 1358 static size_t print_time(u64 ts, char *buf) 1359 { 1360 unsigned long rem_nsec = do_div(ts, 1000000000); 1361 1362 return sprintf(buf, "[%5lu.%06lu]", 1363 (unsigned long)ts, rem_nsec / 1000); 1364 } 1365 1366 #ifdef CONFIG_PRINTK_CALLER 1367 static size_t print_caller(u32 id, char *buf) 1368 { 1369 char caller[12]; 1370 1371 snprintf(caller, sizeof(caller), "%c%u", 1372 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); 1373 return sprintf(buf, "[%6s]", caller); 1374 } 1375 #else 1376 #define print_caller(id, buf) 0 1377 #endif 1378 1379 static size_t info_print_prefix(const struct printk_info *info, bool syslog, 1380 bool time, char *buf) 1381 { 1382 size_t len = 0; 1383 1384 if (syslog) 1385 len = print_syslog((info->facility << 3) | info->level, buf); 1386 1387 if (time) 1388 len += print_time(info->ts_nsec, buf + len); 1389 1390 len += print_caller(info->caller_id, buf + len); 1391 1392 if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) { 1393 buf[len++] = ' '; 1394 buf[len] = '\0'; 1395 } 1396 1397 return len; 1398 } 1399 1400 /* 1401 * Prepare the record for printing. The text is shifted within the given 1402 * buffer to avoid a need for another one. The following operations are 1403 * done: 1404 * 1405 * - Add prefix for each line. 1406 * - Drop truncated lines that no longer fit into the buffer. 1407 * - Add the trailing newline that has been removed in vprintk_store(). 1408 * - Add a string terminator. 1409 * 1410 * Since the produced string is always terminated, the maximum possible 1411 * return value is @r->text_buf_size - 1; 1412 * 1413 * Return: The length of the updated/prepared text, including the added 1414 * prefixes and the newline. The terminator is not counted. The dropped 1415 * line(s) are not counted. 1416 */ 1417 static size_t record_print_text(struct printk_record *r, bool syslog, 1418 bool time) 1419 { 1420 size_t text_len = r->info->text_len; 1421 size_t buf_size = r->text_buf_size; 1422 char *text = r->text_buf; 1423 char prefix[PRINTK_PREFIX_MAX]; 1424 bool truncated = false; 1425 size_t prefix_len; 1426 size_t line_len; 1427 size_t len = 0; 1428 char *next; 1429 1430 /* 1431 * If the message was truncated because the buffer was not large 1432 * enough, treat the available text as if it were the full text. 1433 */ 1434 if (text_len > buf_size) 1435 text_len = buf_size; 1436 1437 prefix_len = info_print_prefix(r->info, syslog, time, prefix); 1438 1439 /* 1440 * @text_len: bytes of unprocessed text 1441 * @line_len: bytes of current line _without_ newline 1442 * @text: pointer to beginning of current line 1443 * @len: number of bytes prepared in r->text_buf 1444 */ 1445 for (;;) { 1446 next = memchr(text, '\n', text_len); 1447 if (next) { 1448 line_len = next - text; 1449 } else { 1450 /* Drop truncated line(s). */ 1451 if (truncated) 1452 break; 1453 line_len = text_len; 1454 } 1455 1456 /* 1457 * Truncate the text if there is not enough space to add the 1458 * prefix and a trailing newline and a terminator. 1459 */ 1460 if (len + prefix_len + text_len + 1 + 1 > buf_size) { 1461 /* Drop even the current line if no space. */ 1462 if (len + prefix_len + line_len + 1 + 1 > buf_size) 1463 break; 1464 1465 text_len = buf_size - len - prefix_len - 1 - 1; 1466 truncated = true; 1467 } 1468 1469 memmove(text + prefix_len, text, text_len); 1470 memcpy(text, prefix, prefix_len); 1471 1472 /* 1473 * Increment the prepared length to include the text and 1474 * prefix that were just moved+copied. Also increment for the 1475 * newline at the end of this line. If this is the last line, 1476 * there is no newline, but it will be added immediately below. 1477 */ 1478 len += prefix_len + line_len + 1; 1479 if (text_len == line_len) { 1480 /* 1481 * This is the last line. Add the trailing newline 1482 * removed in vprintk_store(). 1483 */ 1484 text[prefix_len + line_len] = '\n'; 1485 break; 1486 } 1487 1488 /* 1489 * Advance beyond the added prefix and the related line with 1490 * its newline. 1491 */ 1492 text += prefix_len + line_len + 1; 1493 1494 /* 1495 * The remaining text has only decreased by the line with its 1496 * newline. 1497 * 1498 * Note that @text_len can become zero. It happens when @text 1499 * ended with a newline (either due to truncation or the 1500 * original string ending with "\n\n"). The loop is correctly 1501 * repeated and (if not truncated) an empty line with a prefix 1502 * will be prepared. 1503 */ 1504 text_len -= line_len + 1; 1505 } 1506 1507 /* 1508 * If a buffer was provided, it will be terminated. Space for the 1509 * string terminator is guaranteed to be available. The terminator is 1510 * not counted in the return value. 1511 */ 1512 if (buf_size > 0) 1513 r->text_buf[len] = 0; 1514 1515 return len; 1516 } 1517 1518 static size_t get_record_print_text_size(struct printk_info *info, 1519 unsigned int line_count, 1520 bool syslog, bool time) 1521 { 1522 char prefix[PRINTK_PREFIX_MAX]; 1523 size_t prefix_len; 1524 1525 prefix_len = info_print_prefix(info, syslog, time, prefix); 1526 1527 /* 1528 * Each line will be preceded with a prefix. The intermediate 1529 * newlines are already within the text, but a final trailing 1530 * newline will be added. 1531 */ 1532 return ((prefix_len * line_count) + info->text_len + 1); 1533 } 1534 1535 /* 1536 * Beginning with @start_seq, find the first record where it and all following 1537 * records up to (but not including) @max_seq fit into @size. 1538 * 1539 * @max_seq is simply an upper bound and does not need to exist. If the caller 1540 * does not require an upper bound, -1 can be used for @max_seq. 1541 */ 1542 static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, 1543 bool syslog, bool time) 1544 { 1545 struct printk_info info; 1546 unsigned int line_count; 1547 size_t len = 0; 1548 u64 seq; 1549 1550 /* Determine the size of the records up to @max_seq. */ 1551 prb_for_each_info(start_seq, prb, seq, &info, &line_count) { 1552 if (info.seq >= max_seq) 1553 break; 1554 len += get_record_print_text_size(&info, line_count, syslog, time); 1555 } 1556 1557 /* 1558 * Adjust the upper bound for the next loop to avoid subtracting 1559 * lengths that were never added. 1560 */ 1561 if (seq < max_seq) 1562 max_seq = seq; 1563 1564 /* 1565 * Move first record forward until length fits into the buffer. Ignore 1566 * newest messages that were not counted in the above cycle. Messages 1567 * might appear and get lost in the meantime. This is a best effort 1568 * that prevents an infinite loop that could occur with a retry. 1569 */ 1570 prb_for_each_info(start_seq, prb, seq, &info, &line_count) { 1571 if (len <= size || info.seq >= max_seq) 1572 break; 1573 len -= get_record_print_text_size(&info, line_count, syslog, time); 1574 } 1575 1576 return seq; 1577 } 1578 1579 /* The caller is responsible for making sure @size is greater than 0. */ 1580 static int syslog_print(char __user *buf, int size) 1581 { 1582 struct printk_info info; 1583 struct printk_record r; 1584 char *text; 1585 int len = 0; 1586 u64 seq; 1587 1588 text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL); 1589 if (!text) 1590 return -ENOMEM; 1591 1592 prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); 1593 1594 mutex_lock(&syslog_lock); 1595 1596 /* 1597 * Wait for the @syslog_seq record to be available. @syslog_seq may 1598 * change while waiting. 1599 */ 1600 do { 1601 seq = syslog_seq; 1602 1603 mutex_unlock(&syslog_lock); 1604 /* 1605 * Guarantee this task is visible on the waitqueue before 1606 * checking the wake condition. 1607 * 1608 * The full memory barrier within set_current_state() of 1609 * prepare_to_wait_event() pairs with the full memory barrier 1610 * within wq_has_sleeper(). 1611 * 1612 * This pairs with __wake_up_klogd:A. 1613 */ 1614 len = wait_event_interruptible(log_wait, 1615 prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */ 1616 mutex_lock(&syslog_lock); 1617 1618 if (len) 1619 goto out; 1620 } while (syslog_seq != seq); 1621 1622 /* 1623 * Copy records that fit into the buffer. The above cycle makes sure 1624 * that the first record is always available. 1625 */ 1626 do { 1627 size_t n; 1628 size_t skip; 1629 int err; 1630 1631 if (!prb_read_valid(prb, syslog_seq, &r)) 1632 break; 1633 1634 if (r.info->seq != syslog_seq) { 1635 /* message is gone, move to next valid one */ 1636 syslog_seq = r.info->seq; 1637 syslog_partial = 0; 1638 } 1639 1640 /* 1641 * To keep reading/counting partial line consistent, 1642 * use printk_time value as of the beginning of a line. 1643 */ 1644 if (!syslog_partial) 1645 syslog_time = printk_time; 1646 1647 skip = syslog_partial; 1648 n = record_print_text(&r, true, syslog_time); 1649 if (n - syslog_partial <= size) { 1650 /* message fits into buffer, move forward */ 1651 syslog_seq = r.info->seq + 1; 1652 n -= syslog_partial; 1653 syslog_partial = 0; 1654 } else if (!len){ 1655 /* partial read(), remember position */ 1656 n = size; 1657 syslog_partial += n; 1658 } else 1659 n = 0; 1660 1661 if (!n) 1662 break; 1663 1664 mutex_unlock(&syslog_lock); 1665 err = copy_to_user(buf, text + skip, n); 1666 mutex_lock(&syslog_lock); 1667 1668 if (err) { 1669 if (!len) 1670 len = -EFAULT; 1671 break; 1672 } 1673 1674 len += n; 1675 size -= n; 1676 buf += n; 1677 } while (size); 1678 out: 1679 mutex_unlock(&syslog_lock); 1680 kfree(text); 1681 return len; 1682 } 1683 1684 static int syslog_print_all(char __user *buf, int size, bool clear) 1685 { 1686 struct printk_info info; 1687 struct printk_record r; 1688 char *text; 1689 int len = 0; 1690 u64 seq; 1691 bool time; 1692 1693 text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL); 1694 if (!text) 1695 return -ENOMEM; 1696 1697 time = printk_time; 1698 /* 1699 * Find first record that fits, including all following records, 1700 * into the user-provided buffer for this dump. 1701 */ 1702 seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, 1703 size, true, time); 1704 1705 prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); 1706 1707 prb_for_each_record(seq, prb, seq, &r) { 1708 int textlen; 1709 1710 textlen = record_print_text(&r, true, time); 1711 1712 if (len + textlen > size) { 1713 seq--; 1714 break; 1715 } 1716 1717 if (copy_to_user(buf + len, text, textlen)) 1718 len = -EFAULT; 1719 else 1720 len += textlen; 1721 1722 if (len < 0) 1723 break; 1724 } 1725 1726 if (clear) { 1727 mutex_lock(&syslog_lock); 1728 latched_seq_write(&clear_seq, seq); 1729 mutex_unlock(&syslog_lock); 1730 } 1731 1732 kfree(text); 1733 return len; 1734 } 1735 1736 static void syslog_clear(void) 1737 { 1738 mutex_lock(&syslog_lock); 1739 latched_seq_write(&clear_seq, prb_next_seq(prb)); 1740 mutex_unlock(&syslog_lock); 1741 } 1742 1743 int do_syslog(int type, char __user *buf, int len, int source) 1744 { 1745 struct printk_info info; 1746 bool clear = false; 1747 static int saved_console_loglevel = LOGLEVEL_DEFAULT; 1748 int error; 1749 1750 error = check_syslog_permissions(type, source); 1751 if (error) 1752 return error; 1753 1754 switch (type) { 1755 case SYSLOG_ACTION_CLOSE: /* Close log */ 1756 break; 1757 case SYSLOG_ACTION_OPEN: /* Open log */ 1758 break; 1759 case SYSLOG_ACTION_READ: /* Read from log */ 1760 if (!buf || len < 0) 1761 return -EINVAL; 1762 if (!len) 1763 return 0; 1764 if (!access_ok(buf, len)) 1765 return -EFAULT; 1766 error = syslog_print(buf, len); 1767 break; 1768 /* Read/clear last kernel messages */ 1769 case SYSLOG_ACTION_READ_CLEAR: 1770 clear = true; 1771 fallthrough; 1772 /* Read last kernel messages */ 1773 case SYSLOG_ACTION_READ_ALL: 1774 if (!buf || len < 0) 1775 return -EINVAL; 1776 if (!len) 1777 return 0; 1778 if (!access_ok(buf, len)) 1779 return -EFAULT; 1780 error = syslog_print_all(buf, len, clear); 1781 break; 1782 /* Clear ring buffer */ 1783 case SYSLOG_ACTION_CLEAR: 1784 syslog_clear(); 1785 break; 1786 /* Disable logging to console */ 1787 case SYSLOG_ACTION_CONSOLE_OFF: 1788 if (saved_console_loglevel == LOGLEVEL_DEFAULT) 1789 saved_console_loglevel = console_loglevel; 1790 console_loglevel = minimum_console_loglevel; 1791 break; 1792 /* Enable logging to console */ 1793 case SYSLOG_ACTION_CONSOLE_ON: 1794 if (saved_console_loglevel != LOGLEVEL_DEFAULT) { 1795 console_loglevel = saved_console_loglevel; 1796 saved_console_loglevel = LOGLEVEL_DEFAULT; 1797 } 1798 break; 1799 /* Set level of messages printed to console */ 1800 case SYSLOG_ACTION_CONSOLE_LEVEL: 1801 if (len < 1 || len > 8) 1802 return -EINVAL; 1803 if (len < minimum_console_loglevel) 1804 len = minimum_console_loglevel; 1805 console_loglevel = len; 1806 /* Implicitly re-enable logging to console */ 1807 saved_console_loglevel = LOGLEVEL_DEFAULT; 1808 break; 1809 /* Number of chars in the log buffer */ 1810 case SYSLOG_ACTION_SIZE_UNREAD: 1811 mutex_lock(&syslog_lock); 1812 if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { 1813 /* No unread messages. */ 1814 mutex_unlock(&syslog_lock); 1815 return 0; 1816 } 1817 if (info.seq != syslog_seq) { 1818 /* messages are gone, move to first one */ 1819 syslog_seq = info.seq; 1820 syslog_partial = 0; 1821 } 1822 if (source == SYSLOG_FROM_PROC) { 1823 /* 1824 * Short-cut for poll(/"proc/kmsg") which simply checks 1825 * for pending data, not the size; return the count of 1826 * records, not the length. 1827 */ 1828 error = prb_next_seq(prb) - syslog_seq; 1829 } else { 1830 bool time = syslog_partial ? syslog_time : printk_time; 1831 unsigned int line_count; 1832 u64 seq; 1833 1834 prb_for_each_info(syslog_seq, prb, seq, &info, 1835 &line_count) { 1836 error += get_record_print_text_size(&info, line_count, 1837 true, time); 1838 time = printk_time; 1839 } 1840 error -= syslog_partial; 1841 } 1842 mutex_unlock(&syslog_lock); 1843 break; 1844 /* Size of the log buffer */ 1845 case SYSLOG_ACTION_SIZE_BUFFER: 1846 error = log_buf_len; 1847 break; 1848 default: 1849 error = -EINVAL; 1850 break; 1851 } 1852 1853 return error; 1854 } 1855 1856 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) 1857 { 1858 return do_syslog(type, buf, len, SYSLOG_FROM_READER); 1859 } 1860 1861 /* 1862 * Special console_lock variants that help to reduce the risk of soft-lockups. 1863 * They allow to pass console_lock to another printk() call using a busy wait. 1864 */ 1865 1866 #ifdef CONFIG_LOCKDEP 1867 static struct lockdep_map console_owner_dep_map = { 1868 .name = "console_owner" 1869 }; 1870 #endif 1871 1872 static DEFINE_RAW_SPINLOCK(console_owner_lock); 1873 static struct task_struct *console_owner; 1874 static bool console_waiter; 1875 1876 /** 1877 * console_lock_spinning_enable - mark beginning of code where another 1878 * thread might safely busy wait 1879 * 1880 * This basically converts console_lock into a spinlock. This marks 1881 * the section where the console_lock owner can not sleep, because 1882 * there may be a waiter spinning (like a spinlock). Also it must be 1883 * ready to hand over the lock at the end of the section. 1884 */ 1885 void console_lock_spinning_enable(void) 1886 { 1887 /* 1888 * Do not use spinning in panic(). The panic CPU wants to keep the lock. 1889 * Non-panic CPUs abandon the flush anyway. 1890 * 1891 * Just keep the lockdep annotation. The panic-CPU should avoid 1892 * taking console_owner_lock because it might cause a deadlock. 1893 * This looks like the easiest way how to prevent false lockdep 1894 * reports without handling races a lockless way. 1895 */ 1896 if (panic_in_progress()) 1897 goto lockdep; 1898 1899 raw_spin_lock(&console_owner_lock); 1900 console_owner = current; 1901 raw_spin_unlock(&console_owner_lock); 1902 1903 lockdep: 1904 /* The waiter may spin on us after setting console_owner */ 1905 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1906 } 1907 1908 /** 1909 * console_lock_spinning_disable_and_check - mark end of code where another 1910 * thread was able to busy wait and check if there is a waiter 1911 * @cookie: cookie returned from console_srcu_read_lock() 1912 * 1913 * This is called at the end of the section where spinning is allowed. 1914 * It has two functions. First, it is a signal that it is no longer 1915 * safe to start busy waiting for the lock. Second, it checks if 1916 * there is a busy waiter and passes the lock rights to her. 1917 * 1918 * Important: Callers lose both the console_lock and the SRCU read lock if 1919 * there was a busy waiter. They must not touch items synchronized by 1920 * console_lock or SRCU read lock in this case. 1921 * 1922 * Return: 1 if the lock rights were passed, 0 otherwise. 1923 */ 1924 int console_lock_spinning_disable_and_check(int cookie) 1925 { 1926 int waiter; 1927 1928 /* 1929 * Ignore spinning waiters during panic() because they might get stopped 1930 * or blocked at any time, 1931 * 1932 * It is safe because nobody is allowed to start spinning during panic 1933 * in the first place. If there has been a waiter then non panic CPUs 1934 * might stay spinning. They would get stopped anyway. The panic context 1935 * will never start spinning and an interrupted spin on panic CPU will 1936 * never continue. 1937 */ 1938 if (panic_in_progress()) { 1939 /* Keep lockdep happy. */ 1940 spin_release(&console_owner_dep_map, _THIS_IP_); 1941 return 0; 1942 } 1943 1944 raw_spin_lock(&console_owner_lock); 1945 waiter = READ_ONCE(console_waiter); 1946 console_owner = NULL; 1947 raw_spin_unlock(&console_owner_lock); 1948 1949 if (!waiter) { 1950 spin_release(&console_owner_dep_map, _THIS_IP_); 1951 return 0; 1952 } 1953 1954 /* The waiter is now free to continue */ 1955 WRITE_ONCE(console_waiter, false); 1956 1957 spin_release(&console_owner_dep_map, _THIS_IP_); 1958 1959 /* 1960 * Preserve lockdep lock ordering. Release the SRCU read lock before 1961 * releasing the console_lock. 1962 */ 1963 console_srcu_read_unlock(cookie); 1964 1965 /* 1966 * Hand off console_lock to waiter. The waiter will perform 1967 * the up(). After this, the waiter is the console_lock owner. 1968 */ 1969 mutex_release(&console_lock_dep_map, _THIS_IP_); 1970 return 1; 1971 } 1972 1973 /** 1974 * console_trylock_spinning - try to get console_lock by busy waiting 1975 * 1976 * This allows to busy wait for the console_lock when the current 1977 * owner is running in specially marked sections. It means that 1978 * the current owner is running and cannot reschedule until it 1979 * is ready to lose the lock. 1980 * 1981 * Return: 1 if we got the lock, 0 othrewise 1982 */ 1983 static int console_trylock_spinning(void) 1984 { 1985 struct task_struct *owner = NULL; 1986 bool waiter; 1987 bool spin = false; 1988 unsigned long flags; 1989 1990 if (console_trylock()) 1991 return 1; 1992 1993 /* 1994 * It's unsafe to spin once a panic has begun. If we are the 1995 * panic CPU, we may have already halted the owner of the 1996 * console_sem. If we are not the panic CPU, then we should 1997 * avoid taking console_sem, so the panic CPU has a better 1998 * chance of cleanly acquiring it later. 1999 */ 2000 if (panic_in_progress()) 2001 return 0; 2002 2003 printk_safe_enter_irqsave(flags); 2004 2005 raw_spin_lock(&console_owner_lock); 2006 owner = READ_ONCE(console_owner); 2007 waiter = READ_ONCE(console_waiter); 2008 if (!waiter && owner && owner != current) { 2009 WRITE_ONCE(console_waiter, true); 2010 spin = true; 2011 } 2012 raw_spin_unlock(&console_owner_lock); 2013 2014 /* 2015 * If there is an active printk() writing to the 2016 * consoles, instead of having it write our data too, 2017 * see if we can offload that load from the active 2018 * printer, and do some printing ourselves. 2019 * Go into a spin only if there isn't already a waiter 2020 * spinning, and there is an active printer, and 2021 * that active printer isn't us (recursive printk?). 2022 */ 2023 if (!spin) { 2024 printk_safe_exit_irqrestore(flags); 2025 return 0; 2026 } 2027 2028 /* We spin waiting for the owner to release us */ 2029 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 2030 /* Owner will clear console_waiter on hand off */ 2031 while (READ_ONCE(console_waiter)) 2032 cpu_relax(); 2033 spin_release(&console_owner_dep_map, _THIS_IP_); 2034 2035 printk_safe_exit_irqrestore(flags); 2036 /* 2037 * The owner passed the console lock to us. 2038 * Since we did not spin on console lock, annotate 2039 * this as a trylock. Otherwise lockdep will 2040 * complain. 2041 */ 2042 mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); 2043 2044 /* 2045 * Update @console_may_schedule for trylock because the previous 2046 * owner may have been schedulable. 2047 */ 2048 console_may_schedule = 0; 2049 2050 return 1; 2051 } 2052 2053 /* 2054 * Recursion is tracked separately on each CPU. If NMIs are supported, an 2055 * additional NMI context per CPU is also separately tracked. Until per-CPU 2056 * is available, a separate "early tracking" is performed. 2057 */ 2058 static DEFINE_PER_CPU(u8, printk_count); 2059 static u8 printk_count_early; 2060 #ifdef CONFIG_HAVE_NMI 2061 static DEFINE_PER_CPU(u8, printk_count_nmi); 2062 static u8 printk_count_nmi_early; 2063 #endif 2064 2065 /* 2066 * Recursion is limited to keep the output sane. printk() should not require 2067 * more than 1 level of recursion (allowing, for example, printk() to trigger 2068 * a WARN), but a higher value is used in case some printk-internal errors 2069 * exist, such as the ringbuffer validation checks failing. 2070 */ 2071 #define PRINTK_MAX_RECURSION 3 2072 2073 /* 2074 * Return a pointer to the dedicated counter for the CPU+context of the 2075 * caller. 2076 */ 2077 static u8 *__printk_recursion_counter(void) 2078 { 2079 #ifdef CONFIG_HAVE_NMI 2080 if (in_nmi()) { 2081 if (printk_percpu_data_ready()) 2082 return this_cpu_ptr(&printk_count_nmi); 2083 return &printk_count_nmi_early; 2084 } 2085 #endif 2086 if (printk_percpu_data_ready()) 2087 return this_cpu_ptr(&printk_count); 2088 return &printk_count_early; 2089 } 2090 2091 /* 2092 * Enter recursion tracking. Interrupts are disabled to simplify tracking. 2093 * The caller must check the boolean return value to see if the recursion is 2094 * allowed. On failure, interrupts are not disabled. 2095 * 2096 * @recursion_ptr must be a variable of type (u8 *) and is the same variable 2097 * that is passed to printk_exit_irqrestore(). 2098 */ 2099 #define printk_enter_irqsave(recursion_ptr, flags) \ 2100 ({ \ 2101 bool success = true; \ 2102 \ 2103 typecheck(u8 *, recursion_ptr); \ 2104 local_irq_save(flags); \ 2105 (recursion_ptr) = __printk_recursion_counter(); \ 2106 if (*(recursion_ptr) > PRINTK_MAX_RECURSION) { \ 2107 local_irq_restore(flags); \ 2108 success = false; \ 2109 } else { \ 2110 (*(recursion_ptr))++; \ 2111 } \ 2112 success; \ 2113 }) 2114 2115 /* Exit recursion tracking, restoring interrupts. */ 2116 #define printk_exit_irqrestore(recursion_ptr, flags) \ 2117 do { \ 2118 typecheck(u8 *, recursion_ptr); \ 2119 (*(recursion_ptr))--; \ 2120 local_irq_restore(flags); \ 2121 } while (0) 2122 2123 int printk_delay_msec __read_mostly; 2124 2125 static inline void printk_delay(int level) 2126 { 2127 boot_delay_msec(level); 2128 2129 if (unlikely(printk_delay_msec)) { 2130 int m = printk_delay_msec; 2131 2132 while (m--) { 2133 mdelay(1); 2134 touch_nmi_watchdog(); 2135 } 2136 } 2137 } 2138 2139 static inline u32 printk_caller_id(void) 2140 { 2141 return in_task() ? task_pid_nr(current) : 2142 0x80000000 + smp_processor_id(); 2143 } 2144 2145 /** 2146 * printk_parse_prefix - Parse level and control flags. 2147 * 2148 * @text: The terminated text message. 2149 * @level: A pointer to the current level value, will be updated. 2150 * @flags: A pointer to the current printk_info flags, will be updated. 2151 * 2152 * @level may be NULL if the caller is not interested in the parsed value. 2153 * Otherwise the variable pointed to by @level must be set to 2154 * LOGLEVEL_DEFAULT in order to be updated with the parsed value. 2155 * 2156 * @flags may be NULL if the caller is not interested in the parsed value. 2157 * Otherwise the variable pointed to by @flags will be OR'd with the parsed 2158 * value. 2159 * 2160 * Return: The length of the parsed level and control flags. 2161 */ 2162 u16 printk_parse_prefix(const char *text, int *level, 2163 enum printk_info_flags *flags) 2164 { 2165 u16 prefix_len = 0; 2166 int kern_level; 2167 2168 while (*text) { 2169 kern_level = printk_get_level(text); 2170 if (!kern_level) 2171 break; 2172 2173 switch (kern_level) { 2174 case '0' ... '7': 2175 if (level && *level == LOGLEVEL_DEFAULT) 2176 *level = kern_level - '0'; 2177 break; 2178 case 'c': /* KERN_CONT */ 2179 if (flags) 2180 *flags |= LOG_CONT; 2181 } 2182 2183 prefix_len += 2; 2184 text += 2; 2185 } 2186 2187 return prefix_len; 2188 } 2189 2190 __printf(5, 0) 2191 static u16 printk_sprint(char *text, u16 size, int facility, 2192 enum printk_info_flags *flags, const char *fmt, 2193 va_list args) 2194 { 2195 u16 text_len; 2196 2197 text_len = vscnprintf(text, size, fmt, args); 2198 2199 /* Mark and strip a trailing newline. */ 2200 if (text_len && text[text_len - 1] == '\n') { 2201 text_len--; 2202 *flags |= LOG_NEWLINE; 2203 } 2204 2205 /* Strip log level and control flags. */ 2206 if (facility == 0) { 2207 u16 prefix_len; 2208 2209 prefix_len = printk_parse_prefix(text, NULL, NULL); 2210 if (prefix_len) { 2211 text_len -= prefix_len; 2212 memmove(text, text + prefix_len, text_len); 2213 } 2214 } 2215 2216 trace_console(text, text_len); 2217 2218 return text_len; 2219 } 2220 2221 __printf(4, 0) 2222 int vprintk_store(int facility, int level, 2223 const struct dev_printk_info *dev_info, 2224 const char *fmt, va_list args) 2225 { 2226 struct prb_reserved_entry e; 2227 enum printk_info_flags flags = 0; 2228 struct printk_record r; 2229 unsigned long irqflags; 2230 u16 trunc_msg_len = 0; 2231 char prefix_buf[8]; 2232 u8 *recursion_ptr; 2233 u16 reserve_size; 2234 va_list args2; 2235 u32 caller_id; 2236 u16 text_len; 2237 int ret = 0; 2238 u64 ts_nsec; 2239 2240 if (!printk_enter_irqsave(recursion_ptr, irqflags)) 2241 return 0; 2242 2243 /* 2244 * Since the duration of printk() can vary depending on the message 2245 * and state of the ringbuffer, grab the timestamp now so that it is 2246 * close to the call of printk(). This provides a more deterministic 2247 * timestamp with respect to the caller. 2248 */ 2249 ts_nsec = local_clock(); 2250 2251 caller_id = printk_caller_id(); 2252 2253 /* 2254 * The sprintf needs to come first since the syslog prefix might be 2255 * passed in as a parameter. An extra byte must be reserved so that 2256 * later the vscnprintf() into the reserved buffer has room for the 2257 * terminating '\0', which is not counted by vsnprintf(). 2258 */ 2259 va_copy(args2, args); 2260 reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1; 2261 va_end(args2); 2262 2263 if (reserve_size > PRINTKRB_RECORD_MAX) 2264 reserve_size = PRINTKRB_RECORD_MAX; 2265 2266 /* Extract log level or control flags. */ 2267 if (facility == 0) 2268 printk_parse_prefix(&prefix_buf[0], &level, &flags); 2269 2270 if (level == LOGLEVEL_DEFAULT) 2271 level = default_message_loglevel; 2272 2273 if (dev_info) 2274 flags |= LOG_NEWLINE; 2275 2276 if (flags & LOG_CONT) { 2277 prb_rec_init_wr(&r, reserve_size); 2278 if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) { 2279 text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, 2280 facility, &flags, fmt, args); 2281 r.info->text_len += text_len; 2282 2283 if (flags & LOG_NEWLINE) { 2284 r.info->flags |= LOG_NEWLINE; 2285 prb_final_commit(&e); 2286 } else { 2287 prb_commit(&e); 2288 } 2289 2290 ret = text_len; 2291 goto out; 2292 } 2293 } 2294 2295 /* 2296 * Explicitly initialize the record before every prb_reserve() call. 2297 * prb_reserve_in_last() and prb_reserve() purposely invalidate the 2298 * structure when they fail. 2299 */ 2300 prb_rec_init_wr(&r, reserve_size); 2301 if (!prb_reserve(&e, prb, &r)) { 2302 /* truncate the message if it is too long for empty buffer */ 2303 truncate_msg(&reserve_size, &trunc_msg_len); 2304 2305 prb_rec_init_wr(&r, reserve_size + trunc_msg_len); 2306 if (!prb_reserve(&e, prb, &r)) 2307 goto out; 2308 } 2309 2310 /* fill message */ 2311 text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args); 2312 if (trunc_msg_len) 2313 memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); 2314 r.info->text_len = text_len + trunc_msg_len; 2315 r.info->facility = facility; 2316 r.info->level = level & 7; 2317 r.info->flags = flags & 0x1f; 2318 r.info->ts_nsec = ts_nsec; 2319 r.info->caller_id = caller_id; 2320 if (dev_info) 2321 memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); 2322 2323 /* A message without a trailing newline can be continued. */ 2324 if (!(flags & LOG_NEWLINE)) 2325 prb_commit(&e); 2326 else 2327 prb_final_commit(&e); 2328 2329 ret = text_len + trunc_msg_len; 2330 out: 2331 printk_exit_irqrestore(recursion_ptr, irqflags); 2332 return ret; 2333 } 2334 2335 /* 2336 * This acts as a one-way switch to allow legacy consoles to print from 2337 * the printk() caller context on a panic CPU. It also attempts to flush 2338 * the legacy consoles in this context. 2339 */ 2340 void printk_legacy_allow_panic_sync(void) 2341 { 2342 struct console_flush_type ft; 2343 2344 legacy_allow_panic_sync = true; 2345 2346 printk_get_console_flush_type(&ft); 2347 if (ft.legacy_direct) { 2348 if (console_trylock()) 2349 console_unlock(); 2350 } 2351 } 2352 2353 asmlinkage int vprintk_emit(int facility, int level, 2354 const struct dev_printk_info *dev_info, 2355 const char *fmt, va_list args) 2356 { 2357 struct console_flush_type ft; 2358 int printed_len; 2359 2360 /* Suppress unimportant messages after panic happens */ 2361 if (unlikely(suppress_printk)) 2362 return 0; 2363 2364 /* 2365 * The messages on the panic CPU are the most important. If 2366 * non-panic CPUs are generating any messages, they will be 2367 * silently dropped. 2368 */ 2369 if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) 2370 return 0; 2371 2372 printk_get_console_flush_type(&ft); 2373 2374 /* If called from the scheduler, we can not call up(). */ 2375 if (level == LOGLEVEL_SCHED) { 2376 level = LOGLEVEL_DEFAULT; 2377 ft.legacy_offload |= ft.legacy_direct; 2378 ft.legacy_direct = false; 2379 } 2380 2381 printk_delay(level); 2382 2383 printed_len = vprintk_store(facility, level, dev_info, fmt, args); 2384 2385 if (ft.nbcon_atomic) 2386 nbcon_atomic_flush_pending(); 2387 2388 if (ft.nbcon_offload) 2389 nbcon_kthreads_wake(); 2390 2391 if (ft.legacy_direct) { 2392 /* 2393 * The caller may be holding system-critical or 2394 * timing-sensitive locks. Disable preemption during 2395 * printing of all remaining records to all consoles so that 2396 * this context can return as soon as possible. Hopefully 2397 * another printk() caller will take over the printing. 2398 */ 2399 preempt_disable(); 2400 /* 2401 * Try to acquire and then immediately release the console 2402 * semaphore. The release will print out buffers. With the 2403 * spinning variant, this context tries to take over the 2404 * printing from another printing context. 2405 */ 2406 if (console_trylock_spinning()) 2407 console_unlock(); 2408 preempt_enable(); 2409 } 2410 2411 if (ft.legacy_offload) 2412 defer_console_output(); 2413 else 2414 wake_up_klogd(); 2415 2416 return printed_len; 2417 } 2418 EXPORT_SYMBOL(vprintk_emit); 2419 2420 int vprintk_default(const char *fmt, va_list args) 2421 { 2422 return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); 2423 } 2424 EXPORT_SYMBOL_GPL(vprintk_default); 2425 2426 asmlinkage __visible int _printk(const char *fmt, ...) 2427 { 2428 va_list args; 2429 int r; 2430 2431 va_start(args, fmt); 2432 r = vprintk(fmt, args); 2433 va_end(args); 2434 2435 return r; 2436 } 2437 EXPORT_SYMBOL(_printk); 2438 2439 static bool pr_flush(int timeout_ms, bool reset_on_progress); 2440 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); 2441 2442 #else /* CONFIG_PRINTK */ 2443 2444 #define printk_time false 2445 2446 #define prb_read_valid(rb, seq, r) false 2447 #define prb_first_valid_seq(rb) 0 2448 #define prb_next_seq(rb) 0 2449 2450 static u64 syslog_seq; 2451 2452 static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } 2453 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } 2454 2455 #endif /* CONFIG_PRINTK */ 2456 2457 #ifdef CONFIG_EARLY_PRINTK 2458 struct console *early_console; 2459 2460 asmlinkage __visible void early_printk(const char *fmt, ...) 2461 { 2462 va_list ap; 2463 char buf[512]; 2464 int n; 2465 2466 if (!early_console) 2467 return; 2468 2469 va_start(ap, fmt); 2470 n = vscnprintf(buf, sizeof(buf), fmt, ap); 2471 va_end(ap); 2472 2473 early_console->write(early_console, buf, n); 2474 } 2475 #endif 2476 2477 static void set_user_specified(struct console_cmdline *c, bool user_specified) 2478 { 2479 if (!user_specified) 2480 return; 2481 2482 /* 2483 * @c console was defined by the user on the command line. 2484 * Do not clear when added twice also by SPCR or the device tree. 2485 */ 2486 c->user_specified = true; 2487 /* At least one console defined by the user on the command line. */ 2488 console_set_on_cmdline = 1; 2489 } 2490 2491 static int __add_preferred_console(const char *name, const short idx, 2492 const char *devname, char *options, 2493 char *brl_options, bool user_specified) 2494 { 2495 struct console_cmdline *c; 2496 int i; 2497 2498 if (!name && !devname) 2499 return -EINVAL; 2500 2501 /* 2502 * We use a signed short index for struct console for device drivers to 2503 * indicate a not yet assigned index or port. However, a negative index 2504 * value is not valid when the console name and index are defined on 2505 * the command line. 2506 */ 2507 if (name && idx < 0) 2508 return -EINVAL; 2509 2510 /* 2511 * See if this tty is not yet registered, and 2512 * if we have a slot free. 2513 */ 2514 for (i = 0, c = console_cmdline; 2515 i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); 2516 i++, c++) { 2517 if ((name && strcmp(c->name, name) == 0 && c->index == idx) || 2518 (devname && strcmp(c->devname, devname) == 0)) { 2519 if (!brl_options) 2520 preferred_console = i; 2521 set_user_specified(c, user_specified); 2522 return 0; 2523 } 2524 } 2525 if (i == MAX_CMDLINECONSOLES) 2526 return -E2BIG; 2527 if (!brl_options) 2528 preferred_console = i; 2529 if (name) 2530 strscpy(c->name, name); 2531 if (devname) 2532 strscpy(c->devname, devname); 2533 c->options = options; 2534 set_user_specified(c, user_specified); 2535 braille_set_options(c, brl_options); 2536 2537 c->index = idx; 2538 return 0; 2539 } 2540 2541 static int __init console_msg_format_setup(char *str) 2542 { 2543 if (!strcmp(str, "syslog")) 2544 console_msg_format = MSG_FORMAT_SYSLOG; 2545 if (!strcmp(str, "default")) 2546 console_msg_format = MSG_FORMAT_DEFAULT; 2547 return 1; 2548 } 2549 __setup("console_msg_format=", console_msg_format_setup); 2550 2551 /* 2552 * Set up a console. Called via do_early_param() in init/main.c 2553 * for each "console=" parameter in the boot command line. 2554 */ 2555 static int __init console_setup(char *str) 2556 { 2557 static_assert(sizeof(console_cmdline[0].devname) >= sizeof(console_cmdline[0].name) + 4); 2558 char buf[sizeof(console_cmdline[0].devname)]; 2559 char *brl_options = NULL; 2560 char *ttyname = NULL; 2561 char *devname = NULL; 2562 char *options; 2563 char *s; 2564 int idx; 2565 2566 /* 2567 * console="" or console=null have been suggested as a way to 2568 * disable console output. Use ttynull that has been created 2569 * for exactly this purpose. 2570 */ 2571 if (str[0] == 0 || strcmp(str, "null") == 0) { 2572 __add_preferred_console("ttynull", 0, NULL, NULL, NULL, true); 2573 return 1; 2574 } 2575 2576 if (_braille_console_setup(&str, &brl_options)) 2577 return 1; 2578 2579 /* For a DEVNAME:0.0 style console the character device is unknown early */ 2580 if (strchr(str, ':')) 2581 devname = buf; 2582 else 2583 ttyname = buf; 2584 2585 /* 2586 * Decode str into name, index, options. 2587 */ 2588 if (ttyname && isdigit(str[0])) 2589 scnprintf(buf, sizeof(buf), "ttyS%s", str); 2590 else 2591 strscpy(buf, str); 2592 2593 options = strchr(str, ','); 2594 if (options) 2595 *(options++) = 0; 2596 2597 #ifdef __sparc__ 2598 if (!strcmp(str, "ttya")) 2599 strscpy(buf, "ttyS0"); 2600 if (!strcmp(str, "ttyb")) 2601 strscpy(buf, "ttyS1"); 2602 #endif 2603 2604 for (s = buf; *s; s++) 2605 if ((ttyname && isdigit(*s)) || *s == ',') 2606 break; 2607 2608 /* @idx will get defined when devname matches. */ 2609 if (devname) 2610 idx = -1; 2611 else 2612 idx = simple_strtoul(s, NULL, 10); 2613 2614 *s = 0; 2615 2616 __add_preferred_console(ttyname, idx, devname, options, brl_options, true); 2617 return 1; 2618 } 2619 __setup("console=", console_setup); 2620 2621 /** 2622 * add_preferred_console - add a device to the list of preferred consoles. 2623 * @name: device name 2624 * @idx: device index 2625 * @options: options for this console 2626 * 2627 * The last preferred console added will be used for kernel messages 2628 * and stdin/out/err for init. Normally this is used by console_setup 2629 * above to handle user-supplied console arguments; however it can also 2630 * be used by arch-specific code either to override the user or more 2631 * commonly to provide a default console (ie from PROM variables) when 2632 * the user has not supplied one. 2633 */ 2634 int add_preferred_console(const char *name, const short idx, char *options) 2635 { 2636 return __add_preferred_console(name, idx, NULL, options, NULL, false); 2637 } 2638 2639 /** 2640 * match_devname_and_update_preferred_console - Update a preferred console 2641 * when matching devname is found. 2642 * @devname: DEVNAME:0.0 style device name 2643 * @name: Name of the corresponding console driver, e.g. "ttyS" 2644 * @idx: Console index, e.g. port number. 2645 * 2646 * The function checks whether a device with the given @devname is 2647 * preferred via the console=DEVNAME:0.0 command line option. 2648 * It fills the missing console driver name and console index 2649 * so that a later register_console() call could find (match) 2650 * and enable this device. 2651 * 2652 * It might be used when a driver subsystem initializes particular 2653 * devices with already known DEVNAME:0.0 style names. And it 2654 * could predict which console driver name and index this device 2655 * would later get associated with. 2656 * 2657 * Return: 0 on success, negative error code on failure. 2658 */ 2659 int match_devname_and_update_preferred_console(const char *devname, 2660 const char *name, 2661 const short idx) 2662 { 2663 struct console_cmdline *c = console_cmdline; 2664 int i; 2665 2666 if (!devname || !strlen(devname) || !name || !strlen(name) || idx < 0) 2667 return -EINVAL; 2668 2669 for (i = 0; i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); 2670 i++, c++) { 2671 if (!strcmp(devname, c->devname)) { 2672 pr_info("associate the preferred console \"%s\" with \"%s%d\"\n", 2673 devname, name, idx); 2674 strscpy(c->name, name); 2675 c->index = idx; 2676 return 0; 2677 } 2678 } 2679 2680 return -ENOENT; 2681 } 2682 EXPORT_SYMBOL_GPL(match_devname_and_update_preferred_console); 2683 2684 bool console_suspend_enabled = true; 2685 EXPORT_SYMBOL(console_suspend_enabled); 2686 2687 static int __init console_suspend_disable(char *str) 2688 { 2689 console_suspend_enabled = false; 2690 return 1; 2691 } 2692 __setup("no_console_suspend", console_suspend_disable); 2693 module_param_named(console_suspend, console_suspend_enabled, 2694 bool, S_IRUGO | S_IWUSR); 2695 MODULE_PARM_DESC(console_suspend, "suspend console during suspend" 2696 " and hibernate operations"); 2697 2698 static bool printk_console_no_auto_verbose; 2699 2700 void console_verbose(void) 2701 { 2702 if (console_loglevel && !printk_console_no_auto_verbose) 2703 console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; 2704 } 2705 EXPORT_SYMBOL_GPL(console_verbose); 2706 2707 module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644); 2708 MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc"); 2709 2710 /** 2711 * suspend_console - suspend the console subsystem 2712 * 2713 * This disables printk() while we go into suspend states 2714 */ 2715 void suspend_console(void) 2716 { 2717 struct console *con; 2718 2719 if (!console_suspend_enabled) 2720 return; 2721 pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); 2722 pr_flush(1000, true); 2723 2724 console_list_lock(); 2725 for_each_console(con) 2726 console_srcu_write_flags(con, con->flags | CON_SUSPENDED); 2727 console_list_unlock(); 2728 2729 /* 2730 * Ensure that all SRCU list walks have completed. All printing 2731 * contexts must be able to see that they are suspended so that it 2732 * is guaranteed that all printing has stopped when this function 2733 * completes. 2734 */ 2735 synchronize_srcu(&console_srcu); 2736 } 2737 2738 void resume_console(void) 2739 { 2740 struct console_flush_type ft; 2741 struct console *con; 2742 2743 if (!console_suspend_enabled) 2744 return; 2745 2746 console_list_lock(); 2747 for_each_console(con) 2748 console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); 2749 console_list_unlock(); 2750 2751 /* 2752 * Ensure that all SRCU list walks have completed. All printing 2753 * contexts must be able to see they are no longer suspended so 2754 * that they are guaranteed to wake up and resume printing. 2755 */ 2756 synchronize_srcu(&console_srcu); 2757 2758 printk_get_console_flush_type(&ft); 2759 if (ft.nbcon_offload) 2760 nbcon_kthreads_wake(); 2761 if (ft.legacy_offload) 2762 defer_console_output(); 2763 2764 pr_flush(1000, true); 2765 } 2766 2767 /** 2768 * console_cpu_notify - print deferred console messages after CPU hotplug 2769 * @cpu: unused 2770 * 2771 * If printk() is called from a CPU that is not online yet, the messages 2772 * will be printed on the console only if there are CON_ANYTIME consoles. 2773 * This function is called when a new CPU comes online (or fails to come 2774 * up) or goes offline. 2775 */ 2776 static int console_cpu_notify(unsigned int cpu) 2777 { 2778 struct console_flush_type ft; 2779 2780 if (!cpuhp_tasks_frozen) { 2781 printk_get_console_flush_type(&ft); 2782 if (ft.nbcon_atomic) 2783 nbcon_atomic_flush_pending(); 2784 if (ft.legacy_direct) { 2785 if (console_trylock()) 2786 console_unlock(); 2787 } 2788 } 2789 return 0; 2790 } 2791 2792 /** 2793 * console_lock - block the console subsystem from printing 2794 * 2795 * Acquires a lock which guarantees that no consoles will 2796 * be in or enter their write() callback. 2797 * 2798 * Can sleep, returns nothing. 2799 */ 2800 void console_lock(void) 2801 { 2802 might_sleep(); 2803 2804 /* On panic, the console_lock must be left to the panic cpu. */ 2805 while (other_cpu_in_panic()) 2806 msleep(1000); 2807 2808 down_console_sem(); 2809 console_locked = 1; 2810 console_may_schedule = 1; 2811 } 2812 EXPORT_SYMBOL(console_lock); 2813 2814 /** 2815 * console_trylock - try to block the console subsystem from printing 2816 * 2817 * Try to acquire a lock which guarantees that no consoles will 2818 * be in or enter their write() callback. 2819 * 2820 * returns 1 on success, and 0 on failure to acquire the lock. 2821 */ 2822 int console_trylock(void) 2823 { 2824 /* On panic, the console_lock must be left to the panic cpu. */ 2825 if (other_cpu_in_panic()) 2826 return 0; 2827 if (down_trylock_console_sem()) 2828 return 0; 2829 console_locked = 1; 2830 console_may_schedule = 0; 2831 return 1; 2832 } 2833 EXPORT_SYMBOL(console_trylock); 2834 2835 int is_console_locked(void) 2836 { 2837 return console_locked; 2838 } 2839 EXPORT_SYMBOL(is_console_locked); 2840 2841 static void __console_unlock(void) 2842 { 2843 console_locked = 0; 2844 up_console_sem(); 2845 } 2846 2847 #ifdef CONFIG_PRINTK 2848 2849 /* 2850 * Prepend the message in @pmsg->pbufs->outbuf. This is achieved by shifting 2851 * the existing message over and inserting the scratchbuf message. 2852 * 2853 * @pmsg is the original printk message. 2854 * @fmt is the printf format of the message which will prepend the existing one. 2855 * 2856 * If there is not enough space in @pmsg->pbufs->outbuf, the existing 2857 * message text will be sufficiently truncated. 2858 * 2859 * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. 2860 */ 2861 __printf(2, 3) 2862 static void console_prepend_message(struct printk_message *pmsg, const char *fmt, ...) 2863 { 2864 struct printk_buffers *pbufs = pmsg->pbufs; 2865 const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); 2866 const size_t outbuf_sz = sizeof(pbufs->outbuf); 2867 char *scratchbuf = &pbufs->scratchbuf[0]; 2868 char *outbuf = &pbufs->outbuf[0]; 2869 va_list args; 2870 size_t len; 2871 2872 va_start(args, fmt); 2873 len = vscnprintf(scratchbuf, scratchbuf_sz, fmt, args); 2874 va_end(args); 2875 2876 /* 2877 * Make sure outbuf is sufficiently large before prepending. 2878 * Keep at least the prefix when the message must be truncated. 2879 * It is a rather theoretical problem when someone tries to 2880 * use a minimalist buffer. 2881 */ 2882 if (WARN_ON_ONCE(len + PRINTK_PREFIX_MAX >= outbuf_sz)) 2883 return; 2884 2885 if (pmsg->outbuf_len + len >= outbuf_sz) { 2886 /* Truncate the message, but keep it terminated. */ 2887 pmsg->outbuf_len = outbuf_sz - (len + 1); 2888 outbuf[pmsg->outbuf_len] = 0; 2889 } 2890 2891 memmove(outbuf + len, outbuf, pmsg->outbuf_len + 1); 2892 memcpy(outbuf, scratchbuf, len); 2893 pmsg->outbuf_len += len; 2894 } 2895 2896 /* 2897 * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". 2898 * @pmsg->outbuf_len is updated appropriately. 2899 * 2900 * @pmsg is the printk message to prepend. 2901 * 2902 * @dropped is the dropped count to report in the dropped message. 2903 */ 2904 void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) 2905 { 2906 console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped); 2907 } 2908 2909 /* 2910 * Prepend the message in @pmsg->pbufs->outbuf with a "replay message". 2911 * @pmsg->outbuf_len is updated appropriately. 2912 * 2913 * @pmsg is the printk message to prepend. 2914 */ 2915 void console_prepend_replay(struct printk_message *pmsg) 2916 { 2917 console_prepend_message(pmsg, "** replaying previous printk message **\n"); 2918 } 2919 2920 /* 2921 * Read and format the specified record (or a later record if the specified 2922 * record is not available). 2923 * 2924 * @pmsg will contain the formatted result. @pmsg->pbufs must point to a 2925 * struct printk_buffers. 2926 * 2927 * @seq is the record to read and format. If it is not available, the next 2928 * valid record is read. 2929 * 2930 * @is_extended specifies if the message should be formatted for extended 2931 * console output. 2932 * 2933 * @may_supress specifies if records may be skipped based on loglevel. 2934 * 2935 * Returns false if no record is available. Otherwise true and all fields 2936 * of @pmsg are valid. (See the documentation of struct printk_message 2937 * for information about the @pmsg fields.) 2938 */ 2939 bool printk_get_next_message(struct printk_message *pmsg, u64 seq, 2940 bool is_extended, bool may_suppress) 2941 { 2942 struct printk_buffers *pbufs = pmsg->pbufs; 2943 const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); 2944 const size_t outbuf_sz = sizeof(pbufs->outbuf); 2945 char *scratchbuf = &pbufs->scratchbuf[0]; 2946 char *outbuf = &pbufs->outbuf[0]; 2947 struct printk_info info; 2948 struct printk_record r; 2949 size_t len = 0; 2950 2951 /* 2952 * Formatting extended messages requires a separate buffer, so use the 2953 * scratch buffer to read in the ringbuffer text. 2954 * 2955 * Formatting normal messages is done in-place, so read the ringbuffer 2956 * text directly into the output buffer. 2957 */ 2958 if (is_extended) 2959 prb_rec_init_rd(&r, &info, scratchbuf, scratchbuf_sz); 2960 else 2961 prb_rec_init_rd(&r, &info, outbuf, outbuf_sz); 2962 2963 if (!prb_read_valid(prb, seq, &r)) 2964 return false; 2965 2966 pmsg->seq = r.info->seq; 2967 pmsg->dropped = r.info->seq - seq; 2968 2969 /* Skip record that has level above the console loglevel. */ 2970 if (may_suppress && suppress_message_printing(r.info->level)) 2971 goto out; 2972 2973 if (is_extended) { 2974 len = info_print_ext_header(outbuf, outbuf_sz, r.info); 2975 len += msg_print_ext_body(outbuf + len, outbuf_sz - len, 2976 &r.text_buf[0], r.info->text_len, &r.info->dev_info); 2977 } else { 2978 len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); 2979 } 2980 out: 2981 pmsg->outbuf_len = len; 2982 return true; 2983 } 2984 2985 /* 2986 * Legacy console printing from printk() caller context does not respect 2987 * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a 2988 * false positive. For PREEMPT_RT the false positive condition does not 2989 * occur. 2990 * 2991 * This map is used to temporarily establish LD_WAIT_SLEEP context for the 2992 * console write() callback when legacy printing to avoid false positive 2993 * lockdep complaints, thus allowing lockdep to continue to function for 2994 * real issues. 2995 */ 2996 #ifdef CONFIG_PREEMPT_RT 2997 static inline void printk_legacy_allow_spinlock_enter(void) { } 2998 static inline void printk_legacy_allow_spinlock_exit(void) { } 2999 #else 3000 static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP); 3001 3002 static inline void printk_legacy_allow_spinlock_enter(void) 3003 { 3004 lock_map_acquire_try(&printk_legacy_map); 3005 } 3006 3007 static inline void printk_legacy_allow_spinlock_exit(void) 3008 { 3009 lock_map_release(&printk_legacy_map); 3010 } 3011 #endif /* CONFIG_PREEMPT_RT */ 3012 3013 /* 3014 * Used as the printk buffers for non-panic, serialized console printing. 3015 * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. 3016 * Its usage requires the console_lock held. 3017 */ 3018 struct printk_buffers printk_shared_pbufs; 3019 3020 /* 3021 * Print one record for the given console. The record printed is whatever 3022 * record is the next available record for the given console. 3023 * 3024 * @handover will be set to true if a printk waiter has taken over the 3025 * console_lock, in which case the caller is no longer holding both the 3026 * console_lock and the SRCU read lock. Otherwise it is set to false. 3027 * 3028 * @cookie is the cookie from the SRCU read lock. 3029 * 3030 * Returns false if the given console has no next record to print, otherwise 3031 * true. 3032 * 3033 * Requires the console_lock and the SRCU read lock. 3034 */ 3035 static bool console_emit_next_record(struct console *con, bool *handover, int cookie) 3036 { 3037 bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; 3038 char *outbuf = &printk_shared_pbufs.outbuf[0]; 3039 struct printk_message pmsg = { 3040 .pbufs = &printk_shared_pbufs, 3041 }; 3042 unsigned long flags; 3043 3044 *handover = false; 3045 3046 if (!printk_get_next_message(&pmsg, con->seq, is_extended, true)) 3047 return false; 3048 3049 con->dropped += pmsg.dropped; 3050 3051 /* Skip messages of formatted length 0. */ 3052 if (pmsg.outbuf_len == 0) { 3053 con->seq = pmsg.seq + 1; 3054 goto skip; 3055 } 3056 3057 if (con->dropped && !is_extended) { 3058 console_prepend_dropped(&pmsg, con->dropped); 3059 con->dropped = 0; 3060 } 3061 3062 /* Write everything out to the hardware. */ 3063 3064 if (force_legacy_kthread() && !panic_in_progress()) { 3065 /* 3066 * With forced threading this function is in a task context 3067 * (either legacy kthread or get_init_console_seq()). There 3068 * is no need for concern about printk reentrance, handovers, 3069 * or lockdep complaints. 3070 */ 3071 3072 con->write(con, outbuf, pmsg.outbuf_len); 3073 con->seq = pmsg.seq + 1; 3074 } else { 3075 /* 3076 * While actively printing out messages, if another printk() 3077 * were to occur on another CPU, it may wait for this one to 3078 * finish. This task can not be preempted if there is a 3079 * waiter waiting to take over. 3080 * 3081 * Interrupts are disabled because the hand over to a waiter 3082 * must not be interrupted until the hand over is completed 3083 * (@console_waiter is cleared). 3084 */ 3085 printk_safe_enter_irqsave(flags); 3086 console_lock_spinning_enable(); 3087 3088 /* Do not trace print latency. */ 3089 stop_critical_timings(); 3090 3091 printk_legacy_allow_spinlock_enter(); 3092 con->write(con, outbuf, pmsg.outbuf_len); 3093 printk_legacy_allow_spinlock_exit(); 3094 3095 start_critical_timings(); 3096 3097 con->seq = pmsg.seq + 1; 3098 3099 *handover = console_lock_spinning_disable_and_check(cookie); 3100 printk_safe_exit_irqrestore(flags); 3101 } 3102 skip: 3103 return true; 3104 } 3105 3106 #else 3107 3108 static bool console_emit_next_record(struct console *con, bool *handover, int cookie) 3109 { 3110 *handover = false; 3111 return false; 3112 } 3113 3114 static inline void printk_kthreads_check_locked(void) { } 3115 3116 #endif /* CONFIG_PRINTK */ 3117 3118 /* 3119 * Print out all remaining records to all consoles. 3120 * 3121 * @do_cond_resched is set by the caller. It can be true only in schedulable 3122 * context. 3123 * 3124 * @next_seq is set to the sequence number after the last available record. 3125 * The value is valid only when this function returns true. It means that all 3126 * usable consoles are completely flushed. 3127 * 3128 * @handover will be set to true if a printk waiter has taken over the 3129 * console_lock, in which case the caller is no longer holding the 3130 * console_lock. Otherwise it is set to false. 3131 * 3132 * Returns true when there was at least one usable console and all messages 3133 * were flushed to all usable consoles. A returned false informs the caller 3134 * that everything was not flushed (either there were no usable consoles or 3135 * another context has taken over printing or it is a panic situation and this 3136 * is not the panic CPU). Regardless the reason, the caller should assume it 3137 * is not useful to immediately try again. 3138 * 3139 * Requires the console_lock. 3140 */ 3141 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) 3142 { 3143 struct console_flush_type ft; 3144 bool any_usable = false; 3145 struct console *con; 3146 bool any_progress; 3147 int cookie; 3148 3149 *next_seq = 0; 3150 *handover = false; 3151 3152 do { 3153 any_progress = false; 3154 3155 printk_get_console_flush_type(&ft); 3156 3157 cookie = console_srcu_read_lock(); 3158 for_each_console_srcu(con) { 3159 short flags = console_srcu_read_flags(con); 3160 u64 printk_seq; 3161 bool progress; 3162 3163 /* 3164 * console_flush_all() is only responsible for nbcon 3165 * consoles when the nbcon consoles cannot print via 3166 * their atomic or threaded flushing. 3167 */ 3168 if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) 3169 continue; 3170 3171 if (!console_is_usable(con, flags, !do_cond_resched)) 3172 continue; 3173 any_usable = true; 3174 3175 if (flags & CON_NBCON) { 3176 progress = nbcon_legacy_emit_next_record(con, handover, cookie, 3177 !do_cond_resched); 3178 printk_seq = nbcon_seq_read(con); 3179 } else { 3180 progress = console_emit_next_record(con, handover, cookie); 3181 printk_seq = con->seq; 3182 } 3183 3184 /* 3185 * If a handover has occurred, the SRCU read lock 3186 * is already released. 3187 */ 3188 if (*handover) 3189 return false; 3190 3191 /* Track the next of the highest seq flushed. */ 3192 if (printk_seq > *next_seq) 3193 *next_seq = printk_seq; 3194 3195 if (!progress) 3196 continue; 3197 any_progress = true; 3198 3199 /* Allow panic_cpu to take over the consoles safely. */ 3200 if (other_cpu_in_panic()) 3201 goto abandon; 3202 3203 if (do_cond_resched) 3204 cond_resched(); 3205 } 3206 console_srcu_read_unlock(cookie); 3207 } while (any_progress); 3208 3209 return any_usable; 3210 3211 abandon: 3212 console_srcu_read_unlock(cookie); 3213 return false; 3214 } 3215 3216 static void __console_flush_and_unlock(void) 3217 { 3218 bool do_cond_resched; 3219 bool handover; 3220 bool flushed; 3221 u64 next_seq; 3222 3223 /* 3224 * Console drivers are called with interrupts disabled, so 3225 * @console_may_schedule should be cleared before; however, we may 3226 * end up dumping a lot of lines, for example, if called from 3227 * console registration path, and should invoke cond_resched() 3228 * between lines if allowable. Not doing so can cause a very long 3229 * scheduling stall on a slow console leading to RCU stall and 3230 * softlockup warnings which exacerbate the issue with more 3231 * messages practically incapacitating the system. Therefore, create 3232 * a local to use for the printing loop. 3233 */ 3234 do_cond_resched = console_may_schedule; 3235 3236 do { 3237 console_may_schedule = 0; 3238 3239 flushed = console_flush_all(do_cond_resched, &next_seq, &handover); 3240 if (!handover) 3241 __console_unlock(); 3242 3243 /* 3244 * Abort if there was a failure to flush all messages to all 3245 * usable consoles. Either it is not possible to flush (in 3246 * which case it would be an infinite loop of retrying) or 3247 * another context has taken over printing. 3248 */ 3249 if (!flushed) 3250 break; 3251 3252 /* 3253 * Some context may have added new records after 3254 * console_flush_all() but before unlocking the console. 3255 * Re-check if there is a new record to flush. If the trylock 3256 * fails, another context is already handling the printing. 3257 */ 3258 } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); 3259 } 3260 3261 /** 3262 * console_unlock - unblock the legacy console subsystem from printing 3263 * 3264 * Releases the console_lock which the caller holds to block printing of 3265 * the legacy console subsystem. 3266 * 3267 * While the console_lock was held, console output may have been buffered 3268 * by printk(). If this is the case, console_unlock() emits the output on 3269 * legacy consoles prior to releasing the lock. 3270 * 3271 * console_unlock(); may be called from any context. 3272 */ 3273 void console_unlock(void) 3274 { 3275 struct console_flush_type ft; 3276 3277 printk_get_console_flush_type(&ft); 3278 if (ft.legacy_direct) 3279 __console_flush_and_unlock(); 3280 else 3281 __console_unlock(); 3282 } 3283 EXPORT_SYMBOL(console_unlock); 3284 3285 /** 3286 * console_conditional_schedule - yield the CPU if required 3287 * 3288 * If the console code is currently allowed to sleep, and 3289 * if this CPU should yield the CPU to another task, do 3290 * so here. 3291 * 3292 * Must be called within console_lock();. 3293 */ 3294 void __sched console_conditional_schedule(void) 3295 { 3296 if (console_may_schedule) 3297 cond_resched(); 3298 } 3299 EXPORT_SYMBOL(console_conditional_schedule); 3300 3301 void console_unblank(void) 3302 { 3303 bool found_unblank = false; 3304 struct console *c; 3305 int cookie; 3306 3307 /* 3308 * First check if there are any consoles implementing the unblank() 3309 * callback. If not, there is no reason to continue and take the 3310 * console lock, which in particular can be dangerous if 3311 * @oops_in_progress is set. 3312 */ 3313 cookie = console_srcu_read_lock(); 3314 for_each_console_srcu(c) { 3315 if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) { 3316 found_unblank = true; 3317 break; 3318 } 3319 } 3320 console_srcu_read_unlock(cookie); 3321 if (!found_unblank) 3322 return; 3323 3324 /* 3325 * Stop console printing because the unblank() callback may 3326 * assume the console is not within its write() callback. 3327 * 3328 * If @oops_in_progress is set, this may be an atomic context. 3329 * In that case, attempt a trylock as best-effort. 3330 */ 3331 if (oops_in_progress) { 3332 /* Semaphores are not NMI-safe. */ 3333 if (in_nmi()) 3334 return; 3335 3336 /* 3337 * Attempting to trylock the console lock can deadlock 3338 * if another CPU was stopped while modifying the 3339 * semaphore. "Hope and pray" that this is not the 3340 * current situation. 3341 */ 3342 if (down_trylock_console_sem() != 0) 3343 return; 3344 } else 3345 console_lock(); 3346 3347 console_locked = 1; 3348 console_may_schedule = 0; 3349 3350 cookie = console_srcu_read_lock(); 3351 for_each_console_srcu(c) { 3352 if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) 3353 c->unblank(); 3354 } 3355 console_srcu_read_unlock(cookie); 3356 3357 console_unlock(); 3358 3359 if (!oops_in_progress) 3360 pr_flush(1000, true); 3361 } 3362 3363 /* 3364 * Rewind all consoles to the oldest available record. 3365 * 3366 * IMPORTANT: The function is safe only when called under 3367 * console_lock(). It is not enforced because 3368 * it is used as a best effort in panic(). 3369 */ 3370 static void __console_rewind_all(void) 3371 { 3372 struct console *c; 3373 short flags; 3374 int cookie; 3375 u64 seq; 3376 3377 seq = prb_first_valid_seq(prb); 3378 3379 cookie = console_srcu_read_lock(); 3380 for_each_console_srcu(c) { 3381 flags = console_srcu_read_flags(c); 3382 3383 if (flags & CON_NBCON) { 3384 nbcon_seq_force(c, seq); 3385 } else { 3386 /* 3387 * This assignment is safe only when called under 3388 * console_lock(). On panic, legacy consoles are 3389 * only best effort. 3390 */ 3391 c->seq = seq; 3392 } 3393 } 3394 console_srcu_read_unlock(cookie); 3395 } 3396 3397 /** 3398 * console_flush_on_panic - flush console content on panic 3399 * @mode: flush all messages in buffer or just the pending ones 3400 * 3401 * Immediately output all pending messages no matter what. 3402 */ 3403 void console_flush_on_panic(enum con_flush_mode mode) 3404 { 3405 struct console_flush_type ft; 3406 bool handover; 3407 u64 next_seq; 3408 3409 /* 3410 * Ignore the console lock and flush out the messages. Attempting a 3411 * trylock would not be useful because: 3412 * 3413 * - if it is contended, it must be ignored anyway 3414 * - console_lock() and console_trylock() block and fail 3415 * respectively in panic for non-panic CPUs 3416 * - semaphores are not NMI-safe 3417 */ 3418 3419 /* 3420 * If another context is holding the console lock, 3421 * @console_may_schedule might be set. Clear it so that 3422 * this context does not call cond_resched() while flushing. 3423 */ 3424 console_may_schedule = 0; 3425 3426 if (mode == CONSOLE_REPLAY_ALL) 3427 __console_rewind_all(); 3428 3429 printk_get_console_flush_type(&ft); 3430 if (ft.nbcon_atomic) 3431 nbcon_atomic_flush_pending(); 3432 3433 /* Flush legacy consoles once allowed, even when dangerous. */ 3434 if (legacy_allow_panic_sync) 3435 console_flush_all(false, &next_seq, &handover); 3436 } 3437 3438 /* 3439 * Return the console tty driver structure and its associated index 3440 */ 3441 struct tty_driver *console_device(int *index) 3442 { 3443 struct console *c; 3444 struct tty_driver *driver = NULL; 3445 int cookie; 3446 3447 /* 3448 * Take console_lock to serialize device() callback with 3449 * other console operations. For example, fg_console is 3450 * modified under console_lock when switching vt. 3451 */ 3452 console_lock(); 3453 3454 cookie = console_srcu_read_lock(); 3455 for_each_console_srcu(c) { 3456 if (!c->device) 3457 continue; 3458 driver = c->device(c, index); 3459 if (driver) 3460 break; 3461 } 3462 console_srcu_read_unlock(cookie); 3463 3464 console_unlock(); 3465 return driver; 3466 } 3467 3468 /* 3469 * Prevent further output on the passed console device so that (for example) 3470 * serial drivers can disable console output before suspending a port, and can 3471 * re-enable output afterwards. 3472 */ 3473 void console_stop(struct console *console) 3474 { 3475 __pr_flush(console, 1000, true); 3476 console_list_lock(); 3477 console_srcu_write_flags(console, console->flags & ~CON_ENABLED); 3478 console_list_unlock(); 3479 3480 /* 3481 * Ensure that all SRCU list walks have completed. All contexts must 3482 * be able to see that this console is disabled so that (for example) 3483 * the caller can suspend the port without risk of another context 3484 * using the port. 3485 */ 3486 synchronize_srcu(&console_srcu); 3487 } 3488 EXPORT_SYMBOL(console_stop); 3489 3490 void console_start(struct console *console) 3491 { 3492 struct console_flush_type ft; 3493 bool is_nbcon; 3494 3495 console_list_lock(); 3496 console_srcu_write_flags(console, console->flags | CON_ENABLED); 3497 is_nbcon = console->flags & CON_NBCON; 3498 console_list_unlock(); 3499 3500 /* 3501 * Ensure that all SRCU list walks have completed. The related 3502 * printing context must be able to see it is enabled so that 3503 * it is guaranteed to wake up and resume printing. 3504 */ 3505 synchronize_srcu(&console_srcu); 3506 3507 printk_get_console_flush_type(&ft); 3508 if (is_nbcon && ft.nbcon_offload) 3509 nbcon_kthread_wake(console); 3510 else if (ft.legacy_offload) 3511 defer_console_output(); 3512 3513 __pr_flush(console, 1000, true); 3514 } 3515 EXPORT_SYMBOL(console_start); 3516 3517 #ifdef CONFIG_PRINTK 3518 static int unregister_console_locked(struct console *console); 3519 3520 /* True when system boot is far enough to create printer threads. */ 3521 static bool printk_kthreads_ready __ro_after_init; 3522 3523 static struct task_struct *printk_legacy_kthread; 3524 3525 static bool legacy_kthread_should_wakeup(void) 3526 { 3527 struct console_flush_type ft; 3528 struct console *con; 3529 bool ret = false; 3530 int cookie; 3531 3532 if (kthread_should_stop()) 3533 return true; 3534 3535 printk_get_console_flush_type(&ft); 3536 3537 cookie = console_srcu_read_lock(); 3538 for_each_console_srcu(con) { 3539 short flags = console_srcu_read_flags(con); 3540 u64 printk_seq; 3541 3542 /* 3543 * The legacy printer thread is only responsible for nbcon 3544 * consoles when the nbcon consoles cannot print via their 3545 * atomic or threaded flushing. 3546 */ 3547 if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) 3548 continue; 3549 3550 if (!console_is_usable(con, flags, false)) 3551 continue; 3552 3553 if (flags & CON_NBCON) { 3554 printk_seq = nbcon_seq_read(con); 3555 } else { 3556 /* 3557 * It is safe to read @seq because only this 3558 * thread context updates @seq. 3559 */ 3560 printk_seq = con->seq; 3561 } 3562 3563 if (prb_read_valid(prb, printk_seq, NULL)) { 3564 ret = true; 3565 break; 3566 } 3567 } 3568 console_srcu_read_unlock(cookie); 3569 3570 return ret; 3571 } 3572 3573 static int legacy_kthread_func(void *unused) 3574 { 3575 for (;;) { 3576 wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup()); 3577 3578 if (kthread_should_stop()) 3579 break; 3580 3581 console_lock(); 3582 __console_flush_and_unlock(); 3583 } 3584 3585 return 0; 3586 } 3587 3588 static bool legacy_kthread_create(void) 3589 { 3590 struct task_struct *kt; 3591 3592 lockdep_assert_console_list_lock_held(); 3593 3594 kt = kthread_run(legacy_kthread_func, NULL, "pr/legacy"); 3595 if (WARN_ON(IS_ERR(kt))) { 3596 pr_err("failed to start legacy printing thread\n"); 3597 return false; 3598 } 3599 3600 printk_legacy_kthread = kt; 3601 3602 /* 3603 * It is important that console printing threads are scheduled 3604 * shortly after a printk call and with generous runtime budgets. 3605 */ 3606 sched_set_normal(printk_legacy_kthread, -20); 3607 3608 return true; 3609 } 3610 3611 /** 3612 * printk_kthreads_shutdown - shutdown all threaded printers 3613 * 3614 * On system shutdown all threaded printers are stopped. This allows printk 3615 * to transition back to atomic printing, thus providing a robust mechanism 3616 * for the final shutdown/reboot messages to be output. 3617 */ 3618 static void printk_kthreads_shutdown(void) 3619 { 3620 struct console *con; 3621 3622 console_list_lock(); 3623 if (printk_kthreads_running) { 3624 printk_kthreads_running = false; 3625 3626 for_each_console(con) { 3627 if (con->flags & CON_NBCON) 3628 nbcon_kthread_stop(con); 3629 } 3630 3631 /* 3632 * The threads may have been stopped while printing a 3633 * backlog. Flush any records left over. 3634 */ 3635 nbcon_atomic_flush_pending(); 3636 } 3637 console_list_unlock(); 3638 } 3639 3640 static struct syscore_ops printk_syscore_ops = { 3641 .shutdown = printk_kthreads_shutdown, 3642 }; 3643 3644 /* 3645 * If appropriate, start nbcon kthreads and set @printk_kthreads_running. 3646 * If any kthreads fail to start, those consoles are unregistered. 3647 * 3648 * Must be called under console_list_lock(). 3649 */ 3650 static void printk_kthreads_check_locked(void) 3651 { 3652 struct hlist_node *tmp; 3653 struct console *con; 3654 3655 lockdep_assert_console_list_lock_held(); 3656 3657 if (!printk_kthreads_ready) 3658 return; 3659 3660 if (have_legacy_console || have_boot_console) { 3661 if (!printk_legacy_kthread && 3662 force_legacy_kthread() && 3663 !legacy_kthread_create()) { 3664 /* 3665 * All legacy consoles must be unregistered. If there 3666 * are any nbcon consoles, they will set up their own 3667 * kthread. 3668 */ 3669 hlist_for_each_entry_safe(con, tmp, &console_list, node) { 3670 if (con->flags & CON_NBCON) 3671 continue; 3672 3673 unregister_console_locked(con); 3674 } 3675 } 3676 } else if (printk_legacy_kthread) { 3677 kthread_stop(printk_legacy_kthread); 3678 printk_legacy_kthread = NULL; 3679 } 3680 3681 /* 3682 * Printer threads cannot be started as long as any boot console is 3683 * registered because there is no way to synchronize the hardware 3684 * registers between boot console code and regular console code. 3685 * It can only be known that there will be no new boot consoles when 3686 * an nbcon console is registered. 3687 */ 3688 if (have_boot_console || !have_nbcon_console) { 3689 /* Clear flag in case all nbcon consoles unregistered. */ 3690 printk_kthreads_running = false; 3691 return; 3692 } 3693 3694 if (printk_kthreads_running) 3695 return; 3696 3697 hlist_for_each_entry_safe(con, tmp, &console_list, node) { 3698 if (!(con->flags & CON_NBCON)) 3699 continue; 3700 3701 if (!nbcon_kthread_create(con)) 3702 unregister_console_locked(con); 3703 } 3704 3705 printk_kthreads_running = true; 3706 } 3707 3708 static int __init printk_set_kthreads_ready(void) 3709 { 3710 register_syscore_ops(&printk_syscore_ops); 3711 3712 console_list_lock(); 3713 printk_kthreads_ready = true; 3714 printk_kthreads_check_locked(); 3715 console_list_unlock(); 3716 3717 return 0; 3718 } 3719 early_initcall(printk_set_kthreads_ready); 3720 #endif /* CONFIG_PRINTK */ 3721 3722 static int __read_mostly keep_bootcon; 3723 3724 static int __init keep_bootcon_setup(char *str) 3725 { 3726 keep_bootcon = 1; 3727 pr_info("debug: skip boot console de-registration.\n"); 3728 3729 return 0; 3730 } 3731 3732 early_param("keep_bootcon", keep_bootcon_setup); 3733 3734 static int console_call_setup(struct console *newcon, char *options) 3735 { 3736 int err; 3737 3738 if (!newcon->setup) 3739 return 0; 3740 3741 /* Synchronize with possible boot console. */ 3742 console_lock(); 3743 err = newcon->setup(newcon, options); 3744 console_unlock(); 3745 3746 return err; 3747 } 3748 3749 /* 3750 * This is called by register_console() to try to match 3751 * the newly registered console with any of the ones selected 3752 * by either the command line or add_preferred_console() and 3753 * setup/enable it. 3754 * 3755 * Care need to be taken with consoles that are statically 3756 * enabled such as netconsole 3757 */ 3758 static int try_enable_preferred_console(struct console *newcon, 3759 bool user_specified) 3760 { 3761 struct console_cmdline *c; 3762 int i, err; 3763 3764 for (i = 0, c = console_cmdline; 3765 i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); 3766 i++, c++) { 3767 /* Console not yet initialized? */ 3768 if (!c->name[0]) 3769 continue; 3770 if (c->user_specified != user_specified) 3771 continue; 3772 if (!newcon->match || 3773 newcon->match(newcon, c->name, c->index, c->options) != 0) { 3774 /* default matching */ 3775 BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); 3776 if (strcmp(c->name, newcon->name) != 0) 3777 continue; 3778 if (newcon->index >= 0 && 3779 newcon->index != c->index) 3780 continue; 3781 if (newcon->index < 0) 3782 newcon->index = c->index; 3783 3784 if (_braille_register_console(newcon, c)) 3785 return 0; 3786 3787 err = console_call_setup(newcon, c->options); 3788 if (err) 3789 return err; 3790 } 3791 newcon->flags |= CON_ENABLED; 3792 if (i == preferred_console) 3793 newcon->flags |= CON_CONSDEV; 3794 return 0; 3795 } 3796 3797 /* 3798 * Some consoles, such as pstore and netconsole, can be enabled even 3799 * without matching. Accept the pre-enabled consoles only when match() 3800 * and setup() had a chance to be called. 3801 */ 3802 if (newcon->flags & CON_ENABLED && c->user_specified == user_specified) 3803 return 0; 3804 3805 return -ENOENT; 3806 } 3807 3808 /* Try to enable the console unconditionally */ 3809 static void try_enable_default_console(struct console *newcon) 3810 { 3811 if (newcon->index < 0) 3812 newcon->index = 0; 3813 3814 if (console_call_setup(newcon, NULL) != 0) 3815 return; 3816 3817 newcon->flags |= CON_ENABLED; 3818 3819 if (newcon->device) 3820 newcon->flags |= CON_CONSDEV; 3821 } 3822 3823 /* Return the starting sequence number for a newly registered console. */ 3824 static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered) 3825 { 3826 struct console *con; 3827 bool handover; 3828 u64 init_seq; 3829 3830 if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) { 3831 /* Get a consistent copy of @syslog_seq. */ 3832 mutex_lock(&syslog_lock); 3833 init_seq = syslog_seq; 3834 mutex_unlock(&syslog_lock); 3835 } else { 3836 /* Begin with next message added to ringbuffer. */ 3837 init_seq = prb_next_seq(prb); 3838 3839 /* 3840 * If any enabled boot consoles are due to be unregistered 3841 * shortly, some may not be caught up and may be the same 3842 * device as @newcon. Since it is not known which boot console 3843 * is the same device, flush all consoles and, if necessary, 3844 * start with the message of the enabled boot console that is 3845 * the furthest behind. 3846 */ 3847 if (bootcon_registered && !keep_bootcon) { 3848 /* 3849 * Hold the console_lock to stop console printing and 3850 * guarantee safe access to console->seq. 3851 */ 3852 console_lock(); 3853 3854 /* 3855 * Flush all consoles and set the console to start at 3856 * the next unprinted sequence number. 3857 */ 3858 if (!console_flush_all(true, &init_seq, &handover)) { 3859 /* 3860 * Flushing failed. Just choose the lowest 3861 * sequence of the enabled boot consoles. 3862 */ 3863 3864 /* 3865 * If there was a handover, this context no 3866 * longer holds the console_lock. 3867 */ 3868 if (handover) 3869 console_lock(); 3870 3871 init_seq = prb_next_seq(prb); 3872 for_each_console(con) { 3873 u64 seq; 3874 3875 if (!(con->flags & CON_BOOT) || 3876 !(con->flags & CON_ENABLED)) { 3877 continue; 3878 } 3879 3880 if (con->flags & CON_NBCON) 3881 seq = nbcon_seq_read(con); 3882 else 3883 seq = con->seq; 3884 3885 if (seq < init_seq) 3886 init_seq = seq; 3887 } 3888 } 3889 3890 console_unlock(); 3891 } 3892 } 3893 3894 return init_seq; 3895 } 3896 3897 #define console_first() \ 3898 hlist_entry(console_list.first, struct console, node) 3899 3900 static int unregister_console_locked(struct console *console); 3901 3902 /* 3903 * The console driver calls this routine during kernel initialization 3904 * to register the console printing procedure with printk() and to 3905 * print any messages that were printed by the kernel before the 3906 * console driver was initialized. 3907 * 3908 * This can happen pretty early during the boot process (because of 3909 * early_printk) - sometimes before setup_arch() completes - be careful 3910 * of what kernel features are used - they may not be initialised yet. 3911 * 3912 * There are two types of consoles - bootconsoles (early_printk) and 3913 * "real" consoles (everything which is not a bootconsole) which are 3914 * handled differently. 3915 * - Any number of bootconsoles can be registered at any time. 3916 * - As soon as a "real" console is registered, all bootconsoles 3917 * will be unregistered automatically. 3918 * - Once a "real" console is registered, any attempt to register a 3919 * bootconsoles will be rejected 3920 */ 3921 void register_console(struct console *newcon) 3922 { 3923 bool use_device_lock = (newcon->flags & CON_NBCON) && newcon->write_atomic; 3924 bool bootcon_registered = false; 3925 bool realcon_registered = false; 3926 struct console *con; 3927 unsigned long flags; 3928 u64 init_seq; 3929 int err; 3930 3931 console_list_lock(); 3932 3933 for_each_console(con) { 3934 if (WARN(con == newcon, "console '%s%d' already registered\n", 3935 con->name, con->index)) { 3936 goto unlock; 3937 } 3938 3939 if (con->flags & CON_BOOT) 3940 bootcon_registered = true; 3941 else 3942 realcon_registered = true; 3943 } 3944 3945 /* Do not register boot consoles when there already is a real one. */ 3946 if ((newcon->flags & CON_BOOT) && realcon_registered) { 3947 pr_info("Too late to register bootconsole %s%d\n", 3948 newcon->name, newcon->index); 3949 goto unlock; 3950 } 3951 3952 if (newcon->flags & CON_NBCON) { 3953 /* 3954 * Ensure the nbcon console buffers can be allocated 3955 * before modifying any global data. 3956 */ 3957 if (!nbcon_alloc(newcon)) 3958 goto unlock; 3959 } 3960 3961 /* 3962 * See if we want to enable this console driver by default. 3963 * 3964 * Nope when a console is preferred by the command line, device 3965 * tree, or SPCR. 3966 * 3967 * The first real console with tty binding (driver) wins. More 3968 * consoles might get enabled before the right one is found. 3969 * 3970 * Note that a console with tty binding will have CON_CONSDEV 3971 * flag set and will be first in the list. 3972 */ 3973 if (preferred_console < 0) { 3974 if (hlist_empty(&console_list) || !console_first()->device || 3975 console_first()->flags & CON_BOOT) { 3976 try_enable_default_console(newcon); 3977 } 3978 } 3979 3980 /* See if this console matches one we selected on the command line */ 3981 err = try_enable_preferred_console(newcon, true); 3982 3983 /* If not, try to match against the platform default(s) */ 3984 if (err == -ENOENT) 3985 err = try_enable_preferred_console(newcon, false); 3986 3987 /* printk() messages are not printed to the Braille console. */ 3988 if (err || newcon->flags & CON_BRL) { 3989 if (newcon->flags & CON_NBCON) 3990 nbcon_free(newcon); 3991 goto unlock; 3992 } 3993 3994 /* 3995 * If we have a bootconsole, and are switching to a real console, 3996 * don't print everything out again, since when the boot console, and 3997 * the real console are the same physical device, it's annoying to 3998 * see the beginning boot messages twice 3999 */ 4000 if (bootcon_registered && 4001 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { 4002 newcon->flags &= ~CON_PRINTBUFFER; 4003 } 4004 4005 newcon->dropped = 0; 4006 init_seq = get_init_console_seq(newcon, bootcon_registered); 4007 4008 if (newcon->flags & CON_NBCON) { 4009 have_nbcon_console = true; 4010 nbcon_seq_force(newcon, init_seq); 4011 } else { 4012 have_legacy_console = true; 4013 newcon->seq = init_seq; 4014 } 4015 4016 if (newcon->flags & CON_BOOT) 4017 have_boot_console = true; 4018 4019 /* 4020 * If another context is actively using the hardware of this new 4021 * console, it will not be aware of the nbcon synchronization. This 4022 * is a risk that two contexts could access the hardware 4023 * simultaneously if this new console is used for atomic printing 4024 * and the other context is still using the hardware. 4025 * 4026 * Use the driver synchronization to ensure that the hardware is not 4027 * in use while this new console transitions to being registered. 4028 */ 4029 if (use_device_lock) 4030 newcon->device_lock(newcon, &flags); 4031 4032 /* 4033 * Put this console in the list - keep the 4034 * preferred driver at the head of the list. 4035 */ 4036 if (hlist_empty(&console_list)) { 4037 /* Ensure CON_CONSDEV is always set for the head. */ 4038 newcon->flags |= CON_CONSDEV; 4039 hlist_add_head_rcu(&newcon->node, &console_list); 4040 4041 } else if (newcon->flags & CON_CONSDEV) { 4042 /* Only the new head can have CON_CONSDEV set. */ 4043 console_srcu_write_flags(console_first(), console_first()->flags & ~CON_CONSDEV); 4044 hlist_add_head_rcu(&newcon->node, &console_list); 4045 4046 } else { 4047 hlist_add_behind_rcu(&newcon->node, console_list.first); 4048 } 4049 4050 /* 4051 * No need to synchronize SRCU here! The caller does not rely 4052 * on all contexts being able to see the new console before 4053 * register_console() completes. 4054 */ 4055 4056 /* This new console is now registered. */ 4057 if (use_device_lock) 4058 newcon->device_unlock(newcon, flags); 4059 4060 console_sysfs_notify(); 4061 4062 /* 4063 * By unregistering the bootconsoles after we enable the real console 4064 * we get the "console xxx enabled" message on all the consoles - 4065 * boot consoles, real consoles, etc - this is to ensure that end 4066 * users know there might be something in the kernel's log buffer that 4067 * went to the bootconsole (that they do not see on the real console) 4068 */ 4069 con_printk(KERN_INFO, newcon, "enabled\n"); 4070 if (bootcon_registered && 4071 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && 4072 !keep_bootcon) { 4073 struct hlist_node *tmp; 4074 4075 hlist_for_each_entry_safe(con, tmp, &console_list, node) { 4076 if (con->flags & CON_BOOT) 4077 unregister_console_locked(con); 4078 } 4079 } 4080 4081 /* Changed console list, may require printer threads to start/stop. */ 4082 printk_kthreads_check_locked(); 4083 unlock: 4084 console_list_unlock(); 4085 } 4086 EXPORT_SYMBOL(register_console); 4087 4088 /* Must be called under console_list_lock(). */ 4089 static int unregister_console_locked(struct console *console) 4090 { 4091 bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; 4092 bool found_legacy_con = false; 4093 bool found_nbcon_con = false; 4094 bool found_boot_con = false; 4095 unsigned long flags; 4096 struct console *c; 4097 int res; 4098 4099 lockdep_assert_console_list_lock_held(); 4100 4101 con_printk(KERN_INFO, console, "disabled\n"); 4102 4103 res = _braille_unregister_console(console); 4104 if (res < 0) 4105 return res; 4106 if (res > 0) 4107 return 0; 4108 4109 if (!console_is_registered_locked(console)) 4110 res = -ENODEV; 4111 else if (console_is_usable(console, console->flags, true)) 4112 __pr_flush(console, 1000, true); 4113 4114 /* Disable it unconditionally */ 4115 console_srcu_write_flags(console, console->flags & ~CON_ENABLED); 4116 4117 if (res < 0) 4118 return res; 4119 4120 /* 4121 * Use the driver synchronization to ensure that the hardware is not 4122 * in use while this console transitions to being unregistered. 4123 */ 4124 if (use_device_lock) 4125 console->device_lock(console, &flags); 4126 4127 hlist_del_init_rcu(&console->node); 4128 4129 if (use_device_lock) 4130 console->device_unlock(console, flags); 4131 4132 /* 4133 * <HISTORICAL> 4134 * If this isn't the last console and it has CON_CONSDEV set, we 4135 * need to set it on the next preferred console. 4136 * </HISTORICAL> 4137 * 4138 * The above makes no sense as there is no guarantee that the next 4139 * console has any device attached. Oh well.... 4140 */ 4141 if (!hlist_empty(&console_list) && console->flags & CON_CONSDEV) 4142 console_srcu_write_flags(console_first(), console_first()->flags | CON_CONSDEV); 4143 4144 /* 4145 * Ensure that all SRCU list walks have completed. All contexts 4146 * must not be able to see this console in the list so that any 4147 * exit/cleanup routines can be performed safely. 4148 */ 4149 synchronize_srcu(&console_srcu); 4150 4151 if (console->flags & CON_NBCON) 4152 nbcon_free(console); 4153 4154 console_sysfs_notify(); 4155 4156 if (console->exit) 4157 res = console->exit(console); 4158 4159 /* 4160 * With this console gone, the global flags tracking registered 4161 * console types may have changed. Update them. 4162 */ 4163 for_each_console(c) { 4164 if (c->flags & CON_BOOT) 4165 found_boot_con = true; 4166 4167 if (c->flags & CON_NBCON) 4168 found_nbcon_con = true; 4169 else 4170 found_legacy_con = true; 4171 } 4172 if (!found_boot_con) 4173 have_boot_console = found_boot_con; 4174 if (!found_legacy_con) 4175 have_legacy_console = found_legacy_con; 4176 if (!found_nbcon_con) 4177 have_nbcon_console = found_nbcon_con; 4178 4179 /* Changed console list, may require printer threads to start/stop. */ 4180 printk_kthreads_check_locked(); 4181 4182 return res; 4183 } 4184 4185 int unregister_console(struct console *console) 4186 { 4187 int res; 4188 4189 console_list_lock(); 4190 res = unregister_console_locked(console); 4191 console_list_unlock(); 4192 return res; 4193 } 4194 EXPORT_SYMBOL(unregister_console); 4195 4196 /** 4197 * console_force_preferred_locked - force a registered console preferred 4198 * @con: The registered console to force preferred. 4199 * 4200 * Must be called under console_list_lock(). 4201 */ 4202 void console_force_preferred_locked(struct console *con) 4203 { 4204 struct console *cur_pref_con; 4205 4206 if (!console_is_registered_locked(con)) 4207 return; 4208 4209 cur_pref_con = console_first(); 4210 4211 /* Already preferred? */ 4212 if (cur_pref_con == con) 4213 return; 4214 4215 /* 4216 * Delete, but do not re-initialize the entry. This allows the console 4217 * to continue to appear registered (via any hlist_unhashed_lockless() 4218 * checks), even though it was briefly removed from the console list. 4219 */ 4220 hlist_del_rcu(&con->node); 4221 4222 /* 4223 * Ensure that all SRCU list walks have completed so that the console 4224 * can be added to the beginning of the console list and its forward 4225 * list pointer can be re-initialized. 4226 */ 4227 synchronize_srcu(&console_srcu); 4228 4229 con->flags |= CON_CONSDEV; 4230 WARN_ON(!con->device); 4231 4232 /* Only the new head can have CON_CONSDEV set. */ 4233 console_srcu_write_flags(cur_pref_con, cur_pref_con->flags & ~CON_CONSDEV); 4234 hlist_add_head_rcu(&con->node, &console_list); 4235 } 4236 EXPORT_SYMBOL(console_force_preferred_locked); 4237 4238 /* 4239 * Initialize the console device. This is called *early*, so 4240 * we can't necessarily depend on lots of kernel help here. 4241 * Just do some early initializations, and do the complex setup 4242 * later. 4243 */ 4244 void __init console_init(void) 4245 { 4246 int ret; 4247 initcall_t call; 4248 initcall_entry_t *ce; 4249 4250 /* Setup the default TTY line discipline. */ 4251 n_tty_init(); 4252 4253 /* 4254 * set up the console device so that later boot sequences can 4255 * inform about problems etc.. 4256 */ 4257 ce = __con_initcall_start; 4258 trace_initcall_level("console"); 4259 while (ce < __con_initcall_end) { 4260 call = initcall_from_entry(ce); 4261 trace_initcall_start(call); 4262 ret = call(); 4263 trace_initcall_finish(call, ret); 4264 ce++; 4265 } 4266 } 4267 4268 /* 4269 * Some boot consoles access data that is in the init section and which will 4270 * be discarded after the initcalls have been run. To make sure that no code 4271 * will access this data, unregister the boot consoles in a late initcall. 4272 * 4273 * If for some reason, such as deferred probe or the driver being a loadable 4274 * module, the real console hasn't registered yet at this point, there will 4275 * be a brief interval in which no messages are logged to the console, which 4276 * makes it difficult to diagnose problems that occur during this time. 4277 * 4278 * To mitigate this problem somewhat, only unregister consoles whose memory 4279 * intersects with the init section. Note that all other boot consoles will 4280 * get unregistered when the real preferred console is registered. 4281 */ 4282 static int __init printk_late_init(void) 4283 { 4284 struct hlist_node *tmp; 4285 struct console *con; 4286 int ret; 4287 4288 console_list_lock(); 4289 hlist_for_each_entry_safe(con, tmp, &console_list, node) { 4290 if (!(con->flags & CON_BOOT)) 4291 continue; 4292 4293 /* Check addresses that might be used for enabled consoles. */ 4294 if (init_section_intersects(con, sizeof(*con)) || 4295 init_section_contains(con->write, 0) || 4296 init_section_contains(con->read, 0) || 4297 init_section_contains(con->device, 0) || 4298 init_section_contains(con->unblank, 0) || 4299 init_section_contains(con->data, 0)) { 4300 /* 4301 * Please, consider moving the reported consoles out 4302 * of the init section. 4303 */ 4304 pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n", 4305 con->name, con->index); 4306 unregister_console_locked(con); 4307 } 4308 } 4309 console_list_unlock(); 4310 4311 ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, 4312 console_cpu_notify); 4313 WARN_ON(ret < 0); 4314 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", 4315 console_cpu_notify, NULL); 4316 WARN_ON(ret < 0); 4317 printk_sysctl_init(); 4318 return 0; 4319 } 4320 late_initcall(printk_late_init); 4321 4322 #if defined CONFIG_PRINTK 4323 /* If @con is specified, only wait for that console. Otherwise wait for all. */ 4324 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) 4325 { 4326 unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); 4327 unsigned long remaining_jiffies = timeout_jiffies; 4328 struct console_flush_type ft; 4329 struct console *c; 4330 u64 last_diff = 0; 4331 u64 printk_seq; 4332 short flags; 4333 int cookie; 4334 u64 diff; 4335 u64 seq; 4336 4337 /* Sorry, pr_flush() will not work this early. */ 4338 if (system_state < SYSTEM_SCHEDULING) 4339 return false; 4340 4341 might_sleep(); 4342 4343 seq = prb_next_reserve_seq(prb); 4344 4345 /* Flush the consoles so that records up to @seq are printed. */ 4346 printk_get_console_flush_type(&ft); 4347 if (ft.nbcon_atomic) 4348 nbcon_atomic_flush_pending(); 4349 if (ft.legacy_direct) { 4350 console_lock(); 4351 console_unlock(); 4352 } 4353 4354 for (;;) { 4355 unsigned long begin_jiffies; 4356 unsigned long slept_jiffies; 4357 4358 diff = 0; 4359 4360 /* 4361 * Hold the console_lock to guarantee safe access to 4362 * console->seq. Releasing console_lock flushes more 4363 * records in case @seq is still not printed on all 4364 * usable consoles. 4365 * 4366 * Holding the console_lock is not necessary if there 4367 * are no legacy or boot consoles. However, such a 4368 * console could register at any time. Always hold the 4369 * console_lock as a precaution rather than 4370 * synchronizing against register_console(). 4371 */ 4372 console_lock(); 4373 4374 cookie = console_srcu_read_lock(); 4375 for_each_console_srcu(c) { 4376 if (con && con != c) 4377 continue; 4378 4379 flags = console_srcu_read_flags(c); 4380 4381 /* 4382 * If consoles are not usable, it cannot be expected 4383 * that they make forward progress, so only increment 4384 * @diff for usable consoles. 4385 */ 4386 if (!console_is_usable(c, flags, true) && 4387 !console_is_usable(c, flags, false)) { 4388 continue; 4389 } 4390 4391 if (flags & CON_NBCON) { 4392 printk_seq = nbcon_seq_read(c); 4393 } else { 4394 printk_seq = c->seq; 4395 } 4396 4397 if (printk_seq < seq) 4398 diff += seq - printk_seq; 4399 } 4400 console_srcu_read_unlock(cookie); 4401 4402 if (diff != last_diff && reset_on_progress) 4403 remaining_jiffies = timeout_jiffies; 4404 4405 console_unlock(); 4406 4407 /* Note: @diff is 0 if there are no usable consoles. */ 4408 if (diff == 0 || remaining_jiffies == 0) 4409 break; 4410 4411 /* msleep(1) might sleep much longer. Check time by jiffies. */ 4412 begin_jiffies = jiffies; 4413 msleep(1); 4414 slept_jiffies = jiffies - begin_jiffies; 4415 4416 remaining_jiffies -= min(slept_jiffies, remaining_jiffies); 4417 4418 last_diff = diff; 4419 } 4420 4421 return (diff == 0); 4422 } 4423 4424 /** 4425 * pr_flush() - Wait for printing threads to catch up. 4426 * 4427 * @timeout_ms: The maximum time (in ms) to wait. 4428 * @reset_on_progress: Reset the timeout if forward progress is seen. 4429 * 4430 * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 4431 * represents infinite waiting. 4432 * 4433 * If @reset_on_progress is true, the timeout will be reset whenever any 4434 * printer has been seen to make some forward progress. 4435 * 4436 * Context: Process context. May sleep while acquiring console lock. 4437 * Return: true if all usable printers are caught up. 4438 */ 4439 static bool pr_flush(int timeout_ms, bool reset_on_progress) 4440 { 4441 return __pr_flush(NULL, timeout_ms, reset_on_progress); 4442 } 4443 4444 /* 4445 * Delayed printk version, for scheduler-internal messages: 4446 */ 4447 #define PRINTK_PENDING_WAKEUP 0x01 4448 #define PRINTK_PENDING_OUTPUT 0x02 4449 4450 static DEFINE_PER_CPU(int, printk_pending); 4451 4452 static void wake_up_klogd_work_func(struct irq_work *irq_work) 4453 { 4454 int pending = this_cpu_xchg(printk_pending, 0); 4455 4456 if (pending & PRINTK_PENDING_OUTPUT) { 4457 if (force_legacy_kthread()) { 4458 if (printk_legacy_kthread) 4459 wake_up_interruptible(&legacy_wait); 4460 } else { 4461 if (console_trylock()) 4462 console_unlock(); 4463 } 4464 } 4465 4466 if (pending & PRINTK_PENDING_WAKEUP) 4467 wake_up_interruptible(&log_wait); 4468 } 4469 4470 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = 4471 IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func); 4472 4473 static void __wake_up_klogd(int val) 4474 { 4475 if (!printk_percpu_data_ready()) 4476 return; 4477 4478 preempt_disable(); 4479 /* 4480 * Guarantee any new records can be seen by tasks preparing to wait 4481 * before this context checks if the wait queue is empty. 4482 * 4483 * The full memory barrier within wq_has_sleeper() pairs with the full 4484 * memory barrier within set_current_state() of 4485 * prepare_to_wait_event(), which is called after ___wait_event() adds 4486 * the waiter but before it has checked the wait condition. 4487 * 4488 * This pairs with devkmsg_read:A and syslog_print:A. 4489 */ 4490 if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ 4491 (val & PRINTK_PENDING_OUTPUT)) { 4492 this_cpu_or(printk_pending, val); 4493 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); 4494 } 4495 preempt_enable(); 4496 } 4497 4498 /** 4499 * wake_up_klogd - Wake kernel logging daemon 4500 * 4501 * Use this function when new records have been added to the ringbuffer 4502 * and the console printing of those records has already occurred or is 4503 * known to be handled by some other context. This function will only 4504 * wake the logging daemon. 4505 * 4506 * Context: Any context. 4507 */ 4508 void wake_up_klogd(void) 4509 { 4510 __wake_up_klogd(PRINTK_PENDING_WAKEUP); 4511 } 4512 4513 /** 4514 * defer_console_output - Wake kernel logging daemon and trigger 4515 * console printing in a deferred context 4516 * 4517 * Use this function when new records have been added to the ringbuffer, 4518 * this context is responsible for console printing those records, but 4519 * the current context is not allowed to perform the console printing. 4520 * Trigger an irq_work context to perform the console printing. This 4521 * function also wakes the logging daemon. 4522 * 4523 * Context: Any context. 4524 */ 4525 void defer_console_output(void) 4526 { 4527 /* 4528 * New messages may have been added directly to the ringbuffer 4529 * using vprintk_store(), so wake any waiters as well. 4530 */ 4531 __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); 4532 } 4533 4534 void printk_trigger_flush(void) 4535 { 4536 defer_console_output(); 4537 } 4538 4539 int vprintk_deferred(const char *fmt, va_list args) 4540 { 4541 return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); 4542 } 4543 4544 int _printk_deferred(const char *fmt, ...) 4545 { 4546 va_list args; 4547 int r; 4548 4549 va_start(args, fmt); 4550 r = vprintk_deferred(fmt, args); 4551 va_end(args); 4552 4553 return r; 4554 } 4555 4556 /* 4557 * printk rate limiting, lifted from the networking subsystem. 4558 * 4559 * This enforces a rate limit: not more than 10 kernel messages 4560 * every 5s to make a denial-of-service attack impossible. 4561 */ 4562 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); 4563 4564 int __printk_ratelimit(const char *func) 4565 { 4566 return ___ratelimit(&printk_ratelimit_state, func); 4567 } 4568 EXPORT_SYMBOL(__printk_ratelimit); 4569 4570 /** 4571 * printk_timed_ratelimit - caller-controlled printk ratelimiting 4572 * @caller_jiffies: pointer to caller's state 4573 * @interval_msecs: minimum interval between prints 4574 * 4575 * printk_timed_ratelimit() returns true if more than @interval_msecs 4576 * milliseconds have elapsed since the last time printk_timed_ratelimit() 4577 * returned true. 4578 */ 4579 bool printk_timed_ratelimit(unsigned long *caller_jiffies, 4580 unsigned int interval_msecs) 4581 { 4582 unsigned long elapsed = jiffies - *caller_jiffies; 4583 4584 if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs)) 4585 return false; 4586 4587 *caller_jiffies = jiffies; 4588 return true; 4589 } 4590 EXPORT_SYMBOL(printk_timed_ratelimit); 4591 4592 static DEFINE_SPINLOCK(dump_list_lock); 4593 static LIST_HEAD(dump_list); 4594 4595 /** 4596 * kmsg_dump_register - register a kernel log dumper. 4597 * @dumper: pointer to the kmsg_dumper structure 4598 * 4599 * Adds a kernel log dumper to the system. The dump callback in the 4600 * structure will be called when the kernel oopses or panics and must be 4601 * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise. 4602 */ 4603 int kmsg_dump_register(struct kmsg_dumper *dumper) 4604 { 4605 unsigned long flags; 4606 int err = -EBUSY; 4607 4608 /* The dump callback needs to be set */ 4609 if (!dumper->dump) 4610 return -EINVAL; 4611 4612 spin_lock_irqsave(&dump_list_lock, flags); 4613 /* Don't allow registering multiple times */ 4614 if (!dumper->registered) { 4615 dumper->registered = 1; 4616 list_add_tail_rcu(&dumper->list, &dump_list); 4617 err = 0; 4618 } 4619 spin_unlock_irqrestore(&dump_list_lock, flags); 4620 4621 return err; 4622 } 4623 EXPORT_SYMBOL_GPL(kmsg_dump_register); 4624 4625 /** 4626 * kmsg_dump_unregister - unregister a kmsg dumper. 4627 * @dumper: pointer to the kmsg_dumper structure 4628 * 4629 * Removes a dump device from the system. Returns zero on success and 4630 * %-EINVAL otherwise. 4631 */ 4632 int kmsg_dump_unregister(struct kmsg_dumper *dumper) 4633 { 4634 unsigned long flags; 4635 int err = -EINVAL; 4636 4637 spin_lock_irqsave(&dump_list_lock, flags); 4638 if (dumper->registered) { 4639 dumper->registered = 0; 4640 list_del_rcu(&dumper->list); 4641 err = 0; 4642 } 4643 spin_unlock_irqrestore(&dump_list_lock, flags); 4644 synchronize_rcu(); 4645 4646 return err; 4647 } 4648 EXPORT_SYMBOL_GPL(kmsg_dump_unregister); 4649 4650 static bool always_kmsg_dump; 4651 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); 4652 4653 const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) 4654 { 4655 switch (reason) { 4656 case KMSG_DUMP_PANIC: 4657 return "Panic"; 4658 case KMSG_DUMP_OOPS: 4659 return "Oops"; 4660 case KMSG_DUMP_EMERG: 4661 return "Emergency"; 4662 case KMSG_DUMP_SHUTDOWN: 4663 return "Shutdown"; 4664 default: 4665 return "Unknown"; 4666 } 4667 } 4668 EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); 4669 4670 /** 4671 * kmsg_dump - dump kernel log to kernel message dumpers. 4672 * @reason: the reason (oops, panic etc) for dumping 4673 * 4674 * Call each of the registered dumper's dump() callback, which can 4675 * retrieve the kmsg records with kmsg_dump_get_line() or 4676 * kmsg_dump_get_buffer(). 4677 */ 4678 void kmsg_dump(enum kmsg_dump_reason reason) 4679 { 4680 struct kmsg_dumper *dumper; 4681 4682 rcu_read_lock(); 4683 list_for_each_entry_rcu(dumper, &dump_list, list) { 4684 enum kmsg_dump_reason max_reason = dumper->max_reason; 4685 4686 /* 4687 * If client has not provided a specific max_reason, default 4688 * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set. 4689 */ 4690 if (max_reason == KMSG_DUMP_UNDEF) { 4691 max_reason = always_kmsg_dump ? KMSG_DUMP_MAX : 4692 KMSG_DUMP_OOPS; 4693 } 4694 if (reason > max_reason) 4695 continue; 4696 4697 /* invoke dumper which will iterate over records */ 4698 dumper->dump(dumper, reason); 4699 } 4700 rcu_read_unlock(); 4701 } 4702 4703 /** 4704 * kmsg_dump_get_line - retrieve one kmsg log line 4705 * @iter: kmsg dump iterator 4706 * @syslog: include the "<4>" prefixes 4707 * @line: buffer to copy the line to 4708 * @size: maximum size of the buffer 4709 * @len: length of line placed into buffer 4710 * 4711 * Start at the beginning of the kmsg buffer, with the oldest kmsg 4712 * record, and copy one record into the provided buffer. 4713 * 4714 * Consecutive calls will return the next available record moving 4715 * towards the end of the buffer with the youngest messages. 4716 * 4717 * A return value of FALSE indicates that there are no more records to 4718 * read. 4719 */ 4720 bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, 4721 char *line, size_t size, size_t *len) 4722 { 4723 u64 min_seq = latched_seq_read_nolock(&clear_seq); 4724 struct printk_info info; 4725 unsigned int line_count; 4726 struct printk_record r; 4727 size_t l = 0; 4728 bool ret = false; 4729 4730 if (iter->cur_seq < min_seq) 4731 iter->cur_seq = min_seq; 4732 4733 prb_rec_init_rd(&r, &info, line, size); 4734 4735 /* Read text or count text lines? */ 4736 if (line) { 4737 if (!prb_read_valid(prb, iter->cur_seq, &r)) 4738 goto out; 4739 l = record_print_text(&r, syslog, printk_time); 4740 } else { 4741 if (!prb_read_valid_info(prb, iter->cur_seq, 4742 &info, &line_count)) { 4743 goto out; 4744 } 4745 l = get_record_print_text_size(&info, line_count, syslog, 4746 printk_time); 4747 4748 } 4749 4750 iter->cur_seq = r.info->seq + 1; 4751 ret = true; 4752 out: 4753 if (len) 4754 *len = l; 4755 return ret; 4756 } 4757 EXPORT_SYMBOL_GPL(kmsg_dump_get_line); 4758 4759 /** 4760 * kmsg_dump_get_buffer - copy kmsg log lines 4761 * @iter: kmsg dump iterator 4762 * @syslog: include the "<4>" prefixes 4763 * @buf: buffer to copy the line to 4764 * @size: maximum size of the buffer 4765 * @len_out: length of line placed into buffer 4766 * 4767 * Start at the end of the kmsg buffer and fill the provided buffer 4768 * with as many of the *youngest* kmsg records that fit into it. 4769 * If the buffer is large enough, all available kmsg records will be 4770 * copied with a single call. 4771 * 4772 * Consecutive calls will fill the buffer with the next block of 4773 * available older records, not including the earlier retrieved ones. 4774 * 4775 * A return value of FALSE indicates that there are no more records to 4776 * read. 4777 */ 4778 bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, 4779 char *buf, size_t size, size_t *len_out) 4780 { 4781 u64 min_seq = latched_seq_read_nolock(&clear_seq); 4782 struct printk_info info; 4783 struct printk_record r; 4784 u64 seq; 4785 u64 next_seq; 4786 size_t len = 0; 4787 bool ret = false; 4788 bool time = printk_time; 4789 4790 if (!buf || !size) 4791 goto out; 4792 4793 if (iter->cur_seq < min_seq) 4794 iter->cur_seq = min_seq; 4795 4796 if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { 4797 if (info.seq != iter->cur_seq) { 4798 /* messages are gone, move to first available one */ 4799 iter->cur_seq = info.seq; 4800 } 4801 } 4802 4803 /* last entry */ 4804 if (iter->cur_seq >= iter->next_seq) 4805 goto out; 4806 4807 /* 4808 * Find first record that fits, including all following records, 4809 * into the user-provided buffer for this dump. Pass in size-1 4810 * because this function (by way of record_print_text()) will 4811 * not write more than size-1 bytes of text into @buf. 4812 */ 4813 seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, 4814 size - 1, syslog, time); 4815 4816 /* 4817 * Next kmsg_dump_get_buffer() invocation will dump block of 4818 * older records stored right before this one. 4819 */ 4820 next_seq = seq; 4821 4822 prb_rec_init_rd(&r, &info, buf, size); 4823 4824 prb_for_each_record(seq, prb, seq, &r) { 4825 if (r.info->seq >= iter->next_seq) 4826 break; 4827 4828 len += record_print_text(&r, syslog, time); 4829 4830 /* Adjust record to store to remaining buffer space. */ 4831 prb_rec_init_rd(&r, &info, buf + len, size - len); 4832 } 4833 4834 iter->next_seq = next_seq; 4835 ret = true; 4836 out: 4837 if (len_out) 4838 *len_out = len; 4839 return ret; 4840 } 4841 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); 4842 4843 /** 4844 * kmsg_dump_rewind - reset the iterator 4845 * @iter: kmsg dump iterator 4846 * 4847 * Reset the dumper's iterator so that kmsg_dump_get_line() and 4848 * kmsg_dump_get_buffer() can be called again and used multiple 4849 * times within the same dumper.dump() callback. 4850 */ 4851 void kmsg_dump_rewind(struct kmsg_dump_iter *iter) 4852 { 4853 iter->cur_seq = latched_seq_read_nolock(&clear_seq); 4854 iter->next_seq = prb_next_seq(prb); 4855 } 4856 EXPORT_SYMBOL_GPL(kmsg_dump_rewind); 4857 4858 /** 4859 * console_try_replay_all - try to replay kernel log on consoles 4860 * 4861 * Try to obtain lock on console subsystem and replay all 4862 * available records in printk buffer on the consoles. 4863 * Does nothing if lock is not obtained. 4864 * 4865 * Context: Any, except for NMI. 4866 */ 4867 void console_try_replay_all(void) 4868 { 4869 struct console_flush_type ft; 4870 4871 printk_get_console_flush_type(&ft); 4872 if (console_trylock()) { 4873 __console_rewind_all(); 4874 if (ft.nbcon_atomic) 4875 nbcon_atomic_flush_pending(); 4876 if (ft.nbcon_offload) 4877 nbcon_kthreads_wake(); 4878 if (ft.legacy_offload) 4879 defer_console_output(); 4880 /* Consoles are flushed as part of console_unlock(). */ 4881 console_unlock(); 4882 } 4883 } 4884 #endif 4885 4886 #ifdef CONFIG_SMP 4887 static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1); 4888 static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0); 4889 4890 /** 4891 * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant 4892 * spinning lock is not owned by any CPU. 4893 * 4894 * Context: Any context. 4895 */ 4896 void __printk_cpu_sync_wait(void) 4897 { 4898 do { 4899 cpu_relax(); 4900 } while (atomic_read(&printk_cpu_sync_owner) != -1); 4901 } 4902 EXPORT_SYMBOL(__printk_cpu_sync_wait); 4903 4904 /** 4905 * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant 4906 * spinning lock. 4907 * 4908 * If no processor has the lock, the calling processor takes the lock and 4909 * becomes the owner. If the calling processor is already the owner of the 4910 * lock, this function succeeds immediately. 4911 * 4912 * Context: Any context. Expects interrupts to be disabled. 4913 * Return: 1 on success, otherwise 0. 4914 */ 4915 int __printk_cpu_sync_try_get(void) 4916 { 4917 int cpu; 4918 int old; 4919 4920 cpu = smp_processor_id(); 4921 4922 /* 4923 * Guarantee loads and stores from this CPU when it is the lock owner 4924 * are _not_ visible to the previous lock owner. This pairs with 4925 * __printk_cpu_sync_put:B. 4926 * 4927 * Memory barrier involvement: 4928 * 4929 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, 4930 * then __printk_cpu_sync_put:A can never read from 4931 * __printk_cpu_sync_try_get:B. 4932 * 4933 * Relies on: 4934 * 4935 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B 4936 * of the previous CPU 4937 * matching 4938 * ACQUIRE from __printk_cpu_sync_try_get:A to 4939 * __printk_cpu_sync_try_get:B of this CPU 4940 */ 4941 old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1, 4942 cpu); /* LMM(__printk_cpu_sync_try_get:A) */ 4943 if (old == -1) { 4944 /* 4945 * This CPU is now the owner and begins loading/storing 4946 * data: LMM(__printk_cpu_sync_try_get:B) 4947 */ 4948 return 1; 4949 4950 } else if (old == cpu) { 4951 /* This CPU is already the owner. */ 4952 atomic_inc(&printk_cpu_sync_nested); 4953 return 1; 4954 } 4955 4956 return 0; 4957 } 4958 EXPORT_SYMBOL(__printk_cpu_sync_try_get); 4959 4960 /** 4961 * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock. 4962 * 4963 * The calling processor must be the owner of the lock. 4964 * 4965 * Context: Any context. Expects interrupts to be disabled. 4966 */ 4967 void __printk_cpu_sync_put(void) 4968 { 4969 if (atomic_read(&printk_cpu_sync_nested)) { 4970 atomic_dec(&printk_cpu_sync_nested); 4971 return; 4972 } 4973 4974 /* 4975 * This CPU is finished loading/storing data: 4976 * LMM(__printk_cpu_sync_put:A) 4977 */ 4978 4979 /* 4980 * Guarantee loads and stores from this CPU when it was the 4981 * lock owner are visible to the next lock owner. This pairs 4982 * with __printk_cpu_sync_try_get:A. 4983 * 4984 * Memory barrier involvement: 4985 * 4986 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, 4987 * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A. 4988 * 4989 * Relies on: 4990 * 4991 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B 4992 * of this CPU 4993 * matching 4994 * ACQUIRE from __printk_cpu_sync_try_get:A to 4995 * __printk_cpu_sync_try_get:B of the next CPU 4996 */ 4997 atomic_set_release(&printk_cpu_sync_owner, 4998 -1); /* LMM(__printk_cpu_sync_put:B) */ 4999 } 5000 EXPORT_SYMBOL(__printk_cpu_sync_put); 5001 #endif /* CONFIG_SMP */ 5002