1 // SPDX-License-Identifier: GPL-2.0 2 3 //! String representations. 4 5 use crate::{ 6 alloc::{flags::*, AllocError, KVec}, 7 error::{to_result, Result}, 8 fmt::{self, Write}, 9 prelude::*, 10 }; 11 use core::{ 12 marker::PhantomData, 13 ops::{self, Deref, DerefMut, Index}, 14 }; 15 16 pub mod parse_int; 17 18 /// Byte string without UTF-8 validity guarantee. 19 #[repr(transparent)] 20 pub struct BStr([u8]); 21 22 impl BStr { 23 /// Returns the length of this string. 24 #[inline] 25 pub const fn len(&self) -> usize { 26 self.0.len() 27 } 28 29 /// Returns `true` if the string is empty. 30 #[inline] 31 pub const fn is_empty(&self) -> bool { 32 self.len() == 0 33 } 34 35 /// Creates a [`BStr`] from a `[u8]`. 36 #[inline] 37 pub const fn from_bytes(bytes: &[u8]) -> &Self { 38 // SAFETY: `BStr` is transparent to `[u8]`. 39 unsafe { &*(core::ptr::from_ref(bytes) as *const BStr) } 40 } 41 42 /// Strip a prefix from `self`. Delegates to [`slice::strip_prefix`]. 43 /// 44 /// # Examples 45 /// 46 /// ``` 47 /// # use kernel::b_str; 48 /// assert_eq!(Some(b_str!("bar")), b_str!("foobar").strip_prefix(b_str!("foo"))); 49 /// assert_eq!(None, b_str!("foobar").strip_prefix(b_str!("bar"))); 50 /// assert_eq!(Some(b_str!("foobar")), b_str!("foobar").strip_prefix(b_str!(""))); 51 /// assert_eq!(Some(b_str!("")), b_str!("foobar").strip_prefix(b_str!("foobar"))); 52 /// ``` 53 pub fn strip_prefix(&self, pattern: impl AsRef<Self>) -> Option<&BStr> { 54 self.deref() 55 .strip_prefix(pattern.as_ref().deref()) 56 .map(Self::from_bytes) 57 } 58 } 59 60 impl fmt::Display for BStr { 61 /// Formats printable ASCII characters, escaping the rest. 62 /// 63 /// ``` 64 /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; 65 /// let ascii = b_str!("Hello, BStr!"); 66 /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; 67 /// assert_eq!(s.to_bytes(), "Hello, BStr!".as_bytes()); 68 /// 69 /// let non_ascii = b_str!(""); 70 /// let s = CString::try_from_fmt(fmt!("{non_ascii}"))?; 71 /// assert_eq!(s.to_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); 72 /// # Ok::<(), kernel::error::Error>(()) 73 /// ``` 74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 75 for &b in &self.0 { 76 match b { 77 // Common escape codes. 78 b'\t' => f.write_str("\\t")?, 79 b'\n' => f.write_str("\\n")?, 80 b'\r' => f.write_str("\\r")?, 81 // Printable characters. 82 0x20..=0x7e => f.write_char(b as char)?, 83 _ => write!(f, "\\x{b:02x}")?, 84 } 85 } 86 Ok(()) 87 } 88 } 89 90 impl fmt::Debug for BStr { 91 /// Formats printable ASCII characters with a double quote on either end, 92 /// escaping the rest. 93 /// 94 /// ``` 95 /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; 96 /// // Embedded double quotes are escaped. 97 /// let ascii = b_str!("Hello, \"BStr\"!"); 98 /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; 99 /// assert_eq!(s.to_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); 100 /// 101 /// let non_ascii = b_str!(""); 102 /// let s = CString::try_from_fmt(fmt!("{non_ascii:?}"))?; 103 /// assert_eq!(s.to_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); 104 /// # Ok::<(), kernel::error::Error>(()) 105 /// ``` 106 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 107 f.write_char('"')?; 108 for &b in &self.0 { 109 match b { 110 // Common escape codes. 111 b'\t' => f.write_str("\\t")?, 112 b'\n' => f.write_str("\\n")?, 113 b'\r' => f.write_str("\\r")?, 114 // String escape characters. 115 b'\"' => f.write_str("\\\"")?, 116 b'\\' => f.write_str("\\\\")?, 117 // Printable characters. 118 0x20..=0x7e => f.write_char(b as char)?, 119 _ => write!(f, "\\x{b:02x}")?, 120 } 121 } 122 f.write_char('"') 123 } 124 } 125 126 impl Deref for BStr { 127 type Target = [u8]; 128 129 #[inline] 130 fn deref(&self) -> &Self::Target { 131 &self.0 132 } 133 } 134 135 impl PartialEq for BStr { 136 fn eq(&self, other: &Self) -> bool { 137 self.deref().eq(other.deref()) 138 } 139 } 140 141 impl<Idx> Index<Idx> for BStr 142 where 143 [u8]: Index<Idx, Output = [u8]>, 144 { 145 type Output = Self; 146 147 fn index(&self, index: Idx) -> &Self::Output { 148 BStr::from_bytes(&self.0[index]) 149 } 150 } 151 152 impl AsRef<BStr> for [u8] { 153 fn as_ref(&self) -> &BStr { 154 BStr::from_bytes(self) 155 } 156 } 157 158 impl AsRef<BStr> for BStr { 159 fn as_ref(&self) -> &BStr { 160 self 161 } 162 } 163 164 /// Creates a new [`BStr`] from a string literal. 165 /// 166 /// `b_str!` converts the supplied string literal to byte string, so non-ASCII 167 /// characters can be included. 168 /// 169 /// # Examples 170 /// 171 /// ``` 172 /// # use kernel::b_str; 173 /// # use kernel::str::BStr; 174 /// const MY_BSTR: &BStr = b_str!("My awesome BStr!"); 175 /// ``` 176 #[macro_export] 177 macro_rules! b_str { 178 ($str:literal) => {{ 179 const S: &'static str = $str; 180 const C: &'static $crate::str::BStr = $crate::str::BStr::from_bytes(S.as_bytes()); 181 C 182 }}; 183 } 184 185 /// Returns a C pointer to the string. 186 // It is a free function rather than a method on an extension trait because: 187 // 188 // - error[E0379]: functions in trait impls cannot be declared const 189 #[inline] 190 pub const fn as_char_ptr_in_const_context(c_str: &CStr) -> *const c_char { 191 c_str.0.as_ptr() 192 } 193 194 /// Possible errors when using conversion functions in [`CStr`]. 195 #[derive(Debug, Clone, Copy)] 196 pub enum CStrConvertError { 197 /// Supplied bytes contain an interior `NUL`. 198 InteriorNul, 199 200 /// Supplied bytes are not terminated by `NUL`. 201 NotNulTerminated, 202 } 203 204 impl From<CStrConvertError> for Error { 205 #[inline] 206 fn from(_: CStrConvertError) -> Error { 207 EINVAL 208 } 209 } 210 211 /// A string that is guaranteed to have exactly one `NUL` byte, which is at the 212 /// end. 213 /// 214 /// Used for interoperability with kernel APIs that take C strings. 215 #[repr(transparent)] 216 pub struct CStr([u8]); 217 218 impl CStr { 219 /// Returns the length of this string excluding `NUL`. 220 #[inline] 221 pub const fn len(&self) -> usize { 222 self.len_with_nul() - 1 223 } 224 225 /// Returns the length of this string with `NUL`. 226 #[inline] 227 pub const fn len_with_nul(&self) -> usize { 228 if self.0.is_empty() { 229 // SAFETY: This is one of the invariant of `CStr`. 230 // We add a `unreachable_unchecked` here to hint the optimizer that 231 // the value returned from this function is non-zero. 232 unsafe { core::hint::unreachable_unchecked() }; 233 } 234 self.0.len() 235 } 236 237 /// Returns `true` if the string only includes `NUL`. 238 #[inline] 239 pub const fn is_empty(&self) -> bool { 240 self.len() == 0 241 } 242 243 /// Wraps a raw C string pointer. 244 /// 245 /// # Safety 246 /// 247 /// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must 248 /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr` 249 /// must not be mutated. 250 #[inline] 251 pub unsafe fn from_char_ptr<'a>(ptr: *const c_char) -> &'a Self { 252 // SAFETY: The safety precondition guarantees `ptr` is a valid pointer 253 // to a `NUL`-terminated C string. 254 let len = unsafe { bindings::strlen(ptr) } + 1; 255 // SAFETY: Lifetime guaranteed by the safety precondition. 256 let bytes = unsafe { core::slice::from_raw_parts(ptr.cast(), len) }; 257 // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. 258 // As we have added 1 to `len`, the last byte is known to be `NUL`. 259 unsafe { Self::from_bytes_with_nul_unchecked(bytes) } 260 } 261 262 /// Creates a [`CStr`] from a `[u8]`. 263 /// 264 /// The provided slice must be `NUL`-terminated, does not contain any 265 /// interior `NUL` bytes. 266 pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> { 267 if bytes.is_empty() { 268 return Err(CStrConvertError::NotNulTerminated); 269 } 270 if bytes[bytes.len() - 1] != 0 { 271 return Err(CStrConvertError::NotNulTerminated); 272 } 273 let mut i = 0; 274 // `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking, 275 // while it couldn't optimize away bounds checks for `i < bytes.len() - 1`. 276 while i + 1 < bytes.len() { 277 if bytes[i] == 0 { 278 return Err(CStrConvertError::InteriorNul); 279 } 280 i += 1; 281 } 282 // SAFETY: We just checked that all properties hold. 283 Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) 284 } 285 286 /// Creates a [`CStr`] from a `[u8]` without performing any additional 287 /// checks. 288 /// 289 /// # Safety 290 /// 291 /// `bytes` *must* end with a `NUL` byte, and should only have a single 292 /// `NUL` byte (or the string will be truncated). 293 #[inline] 294 pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { 295 // SAFETY: Properties of `bytes` guaranteed by the safety precondition. 296 unsafe { core::mem::transmute(bytes) } 297 } 298 299 /// Creates a mutable [`CStr`] from a `[u8]` without performing any 300 /// additional checks. 301 /// 302 /// # Safety 303 /// 304 /// `bytes` *must* end with a `NUL` byte, and should only have a single 305 /// `NUL` byte (or the string will be truncated). 306 #[inline] 307 pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { 308 // SAFETY: Properties of `bytes` guaranteed by the safety precondition. 309 unsafe { &mut *(core::ptr::from_mut(bytes) as *mut CStr) } 310 } 311 312 /// Returns a C pointer to the string. 313 /// 314 /// Using this function in a const context is deprecated in favor of 315 /// [`as_char_ptr_in_const_context`] in preparation for replacing `CStr` with `core::ffi::CStr` 316 /// which does not have this method. 317 #[inline] 318 pub const fn as_char_ptr(&self) -> *const c_char { 319 as_char_ptr_in_const_context(self) 320 } 321 322 /// Convert the string to a byte slice without the trailing `NUL` byte. 323 #[inline] 324 pub fn to_bytes(&self) -> &[u8] { 325 &self.0[..self.len()] 326 } 327 328 /// Convert the string to a byte slice without the trailing `NUL` byte. 329 /// 330 /// This function is deprecated in favor of [`Self::to_bytes`] in preparation for replacing 331 /// `CStr` with `core::ffi::CStr` which does not have this method. 332 #[inline] 333 pub fn as_bytes(&self) -> &[u8] { 334 self.to_bytes() 335 } 336 337 /// Convert the string to a byte slice containing the trailing `NUL` byte. 338 #[inline] 339 pub const fn to_bytes_with_nul(&self) -> &[u8] { 340 &self.0 341 } 342 343 /// Convert the string to a byte slice containing the trailing `NUL` byte. 344 /// 345 /// This function is deprecated in favor of [`Self::to_bytes_with_nul`] in preparation for 346 /// replacing `CStr` with `core::ffi::CStr` which does not have this method. 347 #[inline] 348 pub const fn as_bytes_with_nul(&self) -> &[u8] { 349 self.to_bytes_with_nul() 350 } 351 352 /// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8. 353 /// 354 /// If the contents of the [`CStr`] are valid UTF-8 data, this 355 /// function will return the corresponding [`&str`] slice. Otherwise, 356 /// it will return an error with details of where UTF-8 validation failed. 357 /// 358 /// # Examples 359 /// 360 /// ``` 361 /// # use kernel::str::CStr; 362 /// let cstr = CStr::from_bytes_with_nul(b"foo\0")?; 363 /// assert_eq!(cstr.to_str(), Ok("foo")); 364 /// # Ok::<(), kernel::error::Error>(()) 365 /// ``` 366 #[inline] 367 pub fn to_str(&self) -> Result<&str, core::str::Utf8Error> { 368 core::str::from_utf8(self.as_bytes()) 369 } 370 371 /// Unsafely convert this [`CStr`] into a [`&str`], without checking for 372 /// valid UTF-8. 373 /// 374 /// # Safety 375 /// 376 /// The contents must be valid UTF-8. 377 /// 378 /// # Examples 379 /// 380 /// ``` 381 /// # use kernel::c_str; 382 /// # use kernel::str::CStr; 383 /// let bar = c_str!("ツ"); 384 /// // SAFETY: String literals are guaranteed to be valid UTF-8 385 /// // by the Rust compiler. 386 /// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ"); 387 /// ``` 388 #[inline] 389 pub unsafe fn as_str_unchecked(&self) -> &str { 390 // SAFETY: TODO. 391 unsafe { core::str::from_utf8_unchecked(self.as_bytes()) } 392 } 393 394 /// Convert this [`CStr`] into a [`CString`] by allocating memory and 395 /// copying over the string data. 396 pub fn to_cstring(&self) -> Result<CString, AllocError> { 397 CString::try_from(self) 398 } 399 400 /// Converts this [`CStr`] to its ASCII lower case equivalent in-place. 401 /// 402 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', 403 /// but non-ASCII letters are unchanged. 404 /// 405 /// To return a new lowercased value without modifying the existing one, use 406 /// [`to_ascii_lowercase()`]. 407 /// 408 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase 409 pub fn make_ascii_lowercase(&mut self) { 410 // INVARIANT: This doesn't introduce or remove NUL bytes in the C 411 // string. 412 self.0.make_ascii_lowercase(); 413 } 414 415 /// Converts this [`CStr`] to its ASCII upper case equivalent in-place. 416 /// 417 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', 418 /// but non-ASCII letters are unchanged. 419 /// 420 /// To return a new uppercased value without modifying the existing one, use 421 /// [`to_ascii_uppercase()`]. 422 /// 423 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase 424 pub fn make_ascii_uppercase(&mut self) { 425 // INVARIANT: This doesn't introduce or remove NUL bytes in the C 426 // string. 427 self.0.make_ascii_uppercase(); 428 } 429 430 /// Returns a copy of this [`CString`] where each character is mapped to its 431 /// ASCII lower case equivalent. 432 /// 433 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', 434 /// but non-ASCII letters are unchanged. 435 /// 436 /// To lowercase the value in-place, use [`make_ascii_lowercase`]. 437 /// 438 /// [`make_ascii_lowercase`]: str::make_ascii_lowercase 439 pub fn to_ascii_lowercase(&self) -> Result<CString, AllocError> { 440 let mut s = self.to_cstring()?; 441 442 s.make_ascii_lowercase(); 443 444 Ok(s) 445 } 446 447 /// Returns a copy of this [`CString`] where each character is mapped to its 448 /// ASCII upper case equivalent. 449 /// 450 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', 451 /// but non-ASCII letters are unchanged. 452 /// 453 /// To uppercase the value in-place, use [`make_ascii_uppercase`]. 454 /// 455 /// [`make_ascii_uppercase`]: str::make_ascii_uppercase 456 pub fn to_ascii_uppercase(&self) -> Result<CString, AllocError> { 457 let mut s = self.to_cstring()?; 458 459 s.make_ascii_uppercase(); 460 461 Ok(s) 462 } 463 } 464 465 impl fmt::Display for CStr { 466 /// Formats printable ASCII characters, escaping the rest. 467 /// 468 /// ``` 469 /// # use kernel::c_str; 470 /// # use kernel::prelude::fmt; 471 /// # use kernel::str::CStr; 472 /// # use kernel::str::CString; 473 /// let penguin = c_str!(""); 474 /// let s = CString::try_from_fmt(fmt!("{penguin}"))?; 475 /// assert_eq!(s.to_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); 476 /// 477 /// let ascii = c_str!("so \"cool\""); 478 /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; 479 /// assert_eq!(s.to_bytes_with_nul(), "so \"cool\"\0".as_bytes()); 480 /// # Ok::<(), kernel::error::Error>(()) 481 /// ``` 482 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 483 for &c in self.to_bytes() { 484 if (0x20..0x7f).contains(&c) { 485 // Printable character. 486 f.write_char(c as char)?; 487 } else { 488 write!(f, "\\x{c:02x}")?; 489 } 490 } 491 Ok(()) 492 } 493 } 494 495 impl fmt::Debug for CStr { 496 /// Formats printable ASCII characters with a double quote on either end, escaping the rest. 497 /// 498 /// ``` 499 /// # use kernel::c_str; 500 /// # use kernel::prelude::fmt; 501 /// # use kernel::str::CStr; 502 /// # use kernel::str::CString; 503 /// let penguin = c_str!(""); 504 /// let s = CString::try_from_fmt(fmt!("{penguin:?}"))?; 505 /// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes()); 506 /// 507 /// // Embedded double quotes are escaped. 508 /// let ascii = c_str!("so \"cool\""); 509 /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; 510 /// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes()); 511 /// # Ok::<(), kernel::error::Error>(()) 512 /// ``` 513 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 514 f.write_str("\"")?; 515 for &c in self.as_bytes() { 516 match c { 517 // Printable characters. 518 b'\"' => f.write_str("\\\"")?, 519 0x20..=0x7e => f.write_char(c as char)?, 520 _ => write!(f, "\\x{c:02x}")?, 521 } 522 } 523 f.write_str("\"") 524 } 525 } 526 527 impl AsRef<BStr> for CStr { 528 #[inline] 529 fn as_ref(&self) -> &BStr { 530 BStr::from_bytes(self.as_bytes()) 531 } 532 } 533 534 impl Deref for CStr { 535 type Target = BStr; 536 537 #[inline] 538 fn deref(&self) -> &Self::Target { 539 self.as_ref() 540 } 541 } 542 543 impl Index<ops::RangeFrom<usize>> for CStr { 544 type Output = CStr; 545 546 #[inline] 547 fn index(&self, index: ops::RangeFrom<usize>) -> &Self::Output { 548 // Delegate bounds checking to slice. 549 // Assign to _ to mute clippy's unnecessary operation warning. 550 let _ = &self.as_bytes()[index.start..]; 551 // SAFETY: We just checked the bounds. 552 unsafe { Self::from_bytes_with_nul_unchecked(&self.0[index.start..]) } 553 } 554 } 555 556 impl Index<ops::RangeFull> for CStr { 557 type Output = CStr; 558 559 #[inline] 560 fn index(&self, _index: ops::RangeFull) -> &Self::Output { 561 self 562 } 563 } 564 565 mod private { 566 use core::ops; 567 568 // Marker trait for index types that can be forward to `BStr`. 569 pub trait CStrIndex {} 570 571 impl CStrIndex for usize {} 572 impl CStrIndex for ops::Range<usize> {} 573 impl CStrIndex for ops::RangeInclusive<usize> {} 574 impl CStrIndex for ops::RangeToInclusive<usize> {} 575 } 576 577 impl<Idx> Index<Idx> for CStr 578 where 579 Idx: private::CStrIndex, 580 BStr: Index<Idx>, 581 { 582 type Output = <BStr as Index<Idx>>::Output; 583 584 #[inline] 585 fn index(&self, index: Idx) -> &Self::Output { 586 &self.as_ref()[index] 587 } 588 } 589 590 /// Creates a new [`CStr`] from a string literal. 591 /// 592 /// The string literal should not contain any `NUL` bytes. 593 /// 594 /// # Examples 595 /// 596 /// ``` 597 /// # use kernel::c_str; 598 /// # use kernel::str::CStr; 599 /// const MY_CSTR: &CStr = c_str!("My awesome CStr!"); 600 /// ``` 601 #[macro_export] 602 macro_rules! c_str { 603 ($str:expr) => {{ 604 const S: &str = concat!($str, "\0"); 605 const C: &$crate::str::CStr = match $crate::str::CStr::from_bytes_with_nul(S.as_bytes()) { 606 Ok(v) => v, 607 Err(_) => panic!("string contains interior NUL"), 608 }; 609 C 610 }}; 611 } 612 613 #[kunit_tests(rust_kernel_str)] 614 mod tests { 615 use super::*; 616 617 macro_rules! format { 618 ($($f:tt)*) => ({ 619 CString::try_from_fmt(fmt!($($f)*))?.to_str()? 620 }) 621 } 622 623 const ALL_ASCII_CHARS: &str = 624 "\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\ 625 \\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f \ 626 !\"#$%&'()*+,-./0123456789:;<=>?@\ 627 ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\ 628 \\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\ 629 \\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\ 630 \\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\ 631 \\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\ 632 \\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\ 633 \\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\ 634 \\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\ 635 \\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff"; 636 637 #[test] 638 fn test_cstr_to_str() -> Result { 639 let good_bytes = b"\xf0\x9f\xa6\x80\0"; 640 let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?; 641 let checked_str = checked_cstr.to_str()?; 642 assert_eq!(checked_str, ""); 643 Ok(()) 644 } 645 646 #[test] 647 fn test_cstr_to_str_invalid_utf8() -> Result { 648 let bad_bytes = b"\xc3\x28\0"; 649 let checked_cstr = CStr::from_bytes_with_nul(bad_bytes)?; 650 assert!(checked_cstr.to_str().is_err()); 651 Ok(()) 652 } 653 654 #[test] 655 fn test_cstr_as_str_unchecked() -> Result { 656 let good_bytes = b"\xf0\x9f\x90\xA7\0"; 657 let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?; 658 // SAFETY: The contents come from a string literal which contains valid UTF-8. 659 let unchecked_str = unsafe { checked_cstr.as_str_unchecked() }; 660 assert_eq!(unchecked_str, ""); 661 Ok(()) 662 } 663 664 #[test] 665 fn test_cstr_display() -> Result { 666 let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?; 667 assert_eq!(format!("{hello_world}"), "hello, world!"); 668 let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?; 669 assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a"); 670 let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?; 671 assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); 672 let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?; 673 assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); 674 Ok(()) 675 } 676 677 #[test] 678 fn test_cstr_display_all_bytes() -> Result { 679 let mut bytes: [u8; 256] = [0; 256]; 680 // fill `bytes` with [1..=255] + [0] 681 for i in u8::MIN..=u8::MAX { 682 bytes[i as usize] = i.wrapping_add(1); 683 } 684 let cstr = CStr::from_bytes_with_nul(&bytes)?; 685 assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS); 686 Ok(()) 687 } 688 689 #[test] 690 fn test_cstr_debug() -> Result { 691 let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?; 692 assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); 693 let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?; 694 assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\""); 695 let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?; 696 assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); 697 let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?; 698 assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); 699 Ok(()) 700 } 701 702 #[test] 703 fn test_bstr_display() -> Result { 704 let hello_world = BStr::from_bytes(b"hello, world!"); 705 assert_eq!(format!("{hello_world}"), "hello, world!"); 706 let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); 707 assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_"); 708 let others = BStr::from_bytes(b"\x01"); 709 assert_eq!(format!("{others}"), "\\x01"); 710 let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); 711 assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); 712 let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); 713 assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); 714 Ok(()) 715 } 716 717 #[test] 718 fn test_bstr_debug() -> Result { 719 let hello_world = BStr::from_bytes(b"hello, world!"); 720 assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); 721 let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); 722 assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); 723 let others = BStr::from_bytes(b"\x01"); 724 assert_eq!(format!("{others:?}"), "\"\\x01\""); 725 let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); 726 assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); 727 let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); 728 assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); 729 Ok(()) 730 } 731 } 732 733 /// Allows formatting of [`fmt::Arguments`] into a raw buffer. 734 /// 735 /// It does not fail if callers write past the end of the buffer so that they can calculate the 736 /// size required to fit everything. 737 /// 738 /// # Invariants 739 /// 740 /// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos` 741 /// is less than `end`. 742 pub struct RawFormatter { 743 // Use `usize` to use `saturating_*` functions. 744 beg: usize, 745 pos: usize, 746 end: usize, 747 } 748 749 impl RawFormatter { 750 /// Creates a new instance of [`RawFormatter`] with an empty buffer. 751 fn new() -> Self { 752 // INVARIANT: The buffer is empty, so the region that needs to be writable is empty. 753 Self { 754 beg: 0, 755 pos: 0, 756 end: 0, 757 } 758 } 759 760 /// Creates a new instance of [`RawFormatter`] with the given buffer pointers. 761 /// 762 /// # Safety 763 /// 764 /// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end` 765 /// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`]. 766 pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self { 767 // INVARIANT: The safety requirements guarantee the type invariants. 768 Self { 769 beg: pos as usize, 770 pos: pos as usize, 771 end: end as usize, 772 } 773 } 774 775 /// Creates a new instance of [`RawFormatter`] with the given buffer. 776 /// 777 /// # Safety 778 /// 779 /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes 780 /// for the lifetime of the returned [`RawFormatter`]. 781 pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { 782 let pos = buf as usize; 783 // INVARIANT: We ensure that `end` is never less than `buf`, and the safety requirements 784 // guarantees that the memory region is valid for writes. 785 Self { 786 pos, 787 beg: pos, 788 end: pos.saturating_add(len), 789 } 790 } 791 792 /// Returns the current insert position. 793 /// 794 /// N.B. It may point to invalid memory. 795 pub(crate) fn pos(&self) -> *mut u8 { 796 self.pos as *mut u8 797 } 798 799 /// Returns the number of bytes written to the formatter. 800 pub fn bytes_written(&self) -> usize { 801 self.pos - self.beg 802 } 803 } 804 805 impl fmt::Write for RawFormatter { 806 fn write_str(&mut self, s: &str) -> fmt::Result { 807 // `pos` value after writing `len` bytes. This does not have to be bounded by `end`, but we 808 // don't want it to wrap around to 0. 809 let pos_new = self.pos.saturating_add(s.len()); 810 811 // Amount that we can copy. `saturating_sub` ensures we get 0 if `pos` goes past `end`. 812 let len_to_copy = core::cmp::min(pos_new, self.end).saturating_sub(self.pos); 813 814 if len_to_copy > 0 { 815 // SAFETY: If `len_to_copy` is non-zero, then we know `pos` has not gone past `end` 816 // yet, so it is valid for write per the type invariants. 817 unsafe { 818 core::ptr::copy_nonoverlapping( 819 s.as_bytes().as_ptr(), 820 self.pos as *mut u8, 821 len_to_copy, 822 ) 823 }; 824 } 825 826 self.pos = pos_new; 827 Ok(()) 828 } 829 } 830 831 /// Allows formatting of [`fmt::Arguments`] into a raw buffer. 832 /// 833 /// Fails if callers attempt to write more than will fit in the buffer. 834 pub struct Formatter<'a>(RawFormatter, PhantomData<&'a mut ()>); 835 836 impl Formatter<'_> { 837 /// Creates a new instance of [`Formatter`] with the given buffer. 838 /// 839 /// # Safety 840 /// 841 /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes 842 /// for the lifetime of the returned [`Formatter`]. 843 pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { 844 // SAFETY: The safety requirements of this function satisfy those of the callee. 845 Self(unsafe { RawFormatter::from_buffer(buf, len) }, PhantomData) 846 } 847 848 /// Create a new [`Self`] instance. 849 pub fn new(buffer: &mut [u8]) -> Self { 850 // SAFETY: `buffer` is valid for writes for the entire length for 851 // the lifetime of `Self`. 852 unsafe { Formatter::from_buffer(buffer.as_mut_ptr(), buffer.len()) } 853 } 854 } 855 856 impl Deref for Formatter<'_> { 857 type Target = RawFormatter; 858 859 fn deref(&self) -> &Self::Target { 860 &self.0 861 } 862 } 863 864 impl fmt::Write for Formatter<'_> { 865 fn write_str(&mut self, s: &str) -> fmt::Result { 866 self.0.write_str(s)?; 867 868 // Fail the request if we go past the end of the buffer. 869 if self.0.pos > self.0.end { 870 Err(fmt::Error) 871 } else { 872 Ok(()) 873 } 874 } 875 } 876 877 /// A mutable reference to a byte buffer where a string can be written into. 878 /// 879 /// The buffer will be automatically null terminated after the last written character. 880 /// 881 /// # Invariants 882 /// 883 /// * The first byte of `buffer` is always zero. 884 /// * The length of `buffer` is at least 1. 885 pub(crate) struct NullTerminatedFormatter<'a> { 886 buffer: &'a mut [u8], 887 } 888 889 impl<'a> NullTerminatedFormatter<'a> { 890 /// Create a new [`Self`] instance. 891 pub(crate) fn new(buffer: &'a mut [u8]) -> Option<NullTerminatedFormatter<'a>> { 892 *(buffer.first_mut()?) = 0; 893 894 // INVARIANT: 895 // - We wrote zero to the first byte above. 896 // - If buffer was not at least length 1, `buffer.first_mut()` would return None. 897 Some(Self { buffer }) 898 } 899 } 900 901 impl Write for NullTerminatedFormatter<'_> { 902 fn write_str(&mut self, s: &str) -> fmt::Result { 903 let bytes = s.as_bytes(); 904 let len = bytes.len(); 905 906 // We want space for a zero. By type invariant, buffer length is always at least 1, so no 907 // underflow. 908 if len > self.buffer.len() - 1 { 909 return Err(fmt::Error); 910 } 911 912 let buffer = core::mem::take(&mut self.buffer); 913 // We break the zero start invariant for a short while. 914 buffer[..len].copy_from_slice(bytes); 915 // INVARIANT: We checked above that buffer will have size at least 1 after this assignment. 916 self.buffer = &mut buffer[len..]; 917 918 // INVARIANT: We write zero to the first byte of the buffer. 919 self.buffer[0] = 0; 920 921 Ok(()) 922 } 923 } 924 925 /// # Safety 926 /// 927 /// - `string` must point to a null terminated string that is valid for read. 928 unsafe fn kstrtobool_raw(string: *const u8) -> Result<bool> { 929 let mut result: bool = false; 930 931 // SAFETY: 932 // - By function safety requirement, `string` is a valid null-terminated string. 933 // - `result` is a valid `bool` that we own. 934 to_result(unsafe { bindings::kstrtobool(string, &mut result) })?; 935 Ok(result) 936 } 937 938 /// Convert common user inputs into boolean values using the kernel's `kstrtobool` function. 939 /// 940 /// This routine returns `Ok(bool)` if the first character is one of 'YyTt1NnFf0', or 941 /// \[oO\]\[NnFf\] for "on" and "off". Otherwise it will return `Err(EINVAL)`. 942 /// 943 /// # Examples 944 /// 945 /// ``` 946 /// # use kernel::{c_str, str::kstrtobool}; 947 /// 948 /// // Lowercase 949 /// assert_eq!(kstrtobool(c_str!("true")), Ok(true)); 950 /// assert_eq!(kstrtobool(c_str!("tr")), Ok(true)); 951 /// assert_eq!(kstrtobool(c_str!("t")), Ok(true)); 952 /// assert_eq!(kstrtobool(c_str!("twrong")), Ok(true)); 953 /// assert_eq!(kstrtobool(c_str!("false")), Ok(false)); 954 /// assert_eq!(kstrtobool(c_str!("f")), Ok(false)); 955 /// assert_eq!(kstrtobool(c_str!("yes")), Ok(true)); 956 /// assert_eq!(kstrtobool(c_str!("no")), Ok(false)); 957 /// assert_eq!(kstrtobool(c_str!("on")), Ok(true)); 958 /// assert_eq!(kstrtobool(c_str!("off")), Ok(false)); 959 /// 960 /// // Camel case 961 /// assert_eq!(kstrtobool(c_str!("True")), Ok(true)); 962 /// assert_eq!(kstrtobool(c_str!("False")), Ok(false)); 963 /// assert_eq!(kstrtobool(c_str!("Yes")), Ok(true)); 964 /// assert_eq!(kstrtobool(c_str!("No")), Ok(false)); 965 /// assert_eq!(kstrtobool(c_str!("On")), Ok(true)); 966 /// assert_eq!(kstrtobool(c_str!("Off")), Ok(false)); 967 /// 968 /// // All caps 969 /// assert_eq!(kstrtobool(c_str!("TRUE")), Ok(true)); 970 /// assert_eq!(kstrtobool(c_str!("FALSE")), Ok(false)); 971 /// assert_eq!(kstrtobool(c_str!("YES")), Ok(true)); 972 /// assert_eq!(kstrtobool(c_str!("NO")), Ok(false)); 973 /// assert_eq!(kstrtobool(c_str!("ON")), Ok(true)); 974 /// assert_eq!(kstrtobool(c_str!("OFF")), Ok(false)); 975 /// 976 /// // Numeric 977 /// assert_eq!(kstrtobool(c_str!("1")), Ok(true)); 978 /// assert_eq!(kstrtobool(c_str!("0")), Ok(false)); 979 /// 980 /// // Invalid input 981 /// assert_eq!(kstrtobool(c_str!("invalid")), Err(EINVAL)); 982 /// assert_eq!(kstrtobool(c_str!("2")), Err(EINVAL)); 983 /// ``` 984 pub fn kstrtobool(string: &CStr) -> Result<bool> { 985 // SAFETY: 986 // - The pointer returned by `CStr::as_char_ptr` is guaranteed to be 987 // null terminated. 988 // - `string` is live and thus the string is valid for read. 989 unsafe { kstrtobool_raw(string.as_char_ptr()) } 990 } 991 992 /// Convert `&[u8]` to `bool` by deferring to [`kernel::str::kstrtobool`]. 993 /// 994 /// Only considers at most the first two bytes of `bytes`. 995 pub fn kstrtobool_bytes(bytes: &[u8]) -> Result<bool> { 996 // `ktostrbool` only considers the first two bytes of the input. 997 let stack_string = [*bytes.first().unwrap_or(&0), *bytes.get(1).unwrap_or(&0), 0]; 998 // SAFETY: `stack_string` is null terminated and it is live on the stack so 999 // it is valid for read. 1000 unsafe { kstrtobool_raw(stack_string.as_ptr()) } 1001 } 1002 1003 /// An owned string that is guaranteed to have exactly one `NUL` byte, which is at the end. 1004 /// 1005 /// Used for interoperability with kernel APIs that take C strings. 1006 /// 1007 /// # Invariants 1008 /// 1009 /// The string is always `NUL`-terminated and contains no other `NUL` bytes. 1010 /// 1011 /// # Examples 1012 /// 1013 /// ``` 1014 /// use kernel::{str::CString, prelude::fmt}; 1015 /// 1016 /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?; 1017 /// assert_eq!(s.to_bytes_with_nul(), "abc1020\0".as_bytes()); 1018 /// 1019 /// let tmp = "testing"; 1020 /// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123))?; 1021 /// assert_eq!(s.to_bytes_with_nul(), "testing123\0".as_bytes()); 1022 /// 1023 /// // This fails because it has an embedded `NUL` byte. 1024 /// let s = CString::try_from_fmt(fmt!("a\0b{}", 123)); 1025 /// assert_eq!(s.is_ok(), false); 1026 /// # Ok::<(), kernel::error::Error>(()) 1027 /// ``` 1028 pub struct CString { 1029 buf: KVec<u8>, 1030 } 1031 1032 impl CString { 1033 /// Creates an instance of [`CString`] from the given formatted arguments. 1034 pub fn try_from_fmt(args: fmt::Arguments<'_>) -> Result<Self, Error> { 1035 // Calculate the size needed (formatted string plus `NUL` terminator). 1036 let mut f = RawFormatter::new(); 1037 f.write_fmt(args)?; 1038 f.write_str("\0")?; 1039 let size = f.bytes_written(); 1040 1041 // Allocate a vector with the required number of bytes, and write to it. 1042 let mut buf = KVec::with_capacity(size, GFP_KERNEL)?; 1043 // SAFETY: The buffer stored in `buf` is at least of size `size` and is valid for writes. 1044 let mut f = unsafe { Formatter::from_buffer(buf.as_mut_ptr(), size) }; 1045 f.write_fmt(args)?; 1046 f.write_str("\0")?; 1047 1048 // SAFETY: The number of bytes that can be written to `f` is bounded by `size`, which is 1049 // `buf`'s capacity. The contents of the buffer have been initialised by writes to `f`. 1050 unsafe { buf.inc_len(f.bytes_written()) }; 1051 1052 // Check that there are no `NUL` bytes before the end. 1053 // SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size` 1054 // (which the minimum buffer size) and is non-zero (we wrote at least the `NUL` terminator) 1055 // so `f.bytes_written() - 1` doesn't underflow. 1056 let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, f.bytes_written() - 1) }; 1057 if !ptr.is_null() { 1058 return Err(EINVAL); 1059 } 1060 1061 // INVARIANT: We wrote the `NUL` terminator and checked above that no other `NUL` bytes 1062 // exist in the buffer. 1063 Ok(Self { buf }) 1064 } 1065 } 1066 1067 impl Deref for CString { 1068 type Target = CStr; 1069 1070 fn deref(&self) -> &Self::Target { 1071 // SAFETY: The type invariants guarantee that the string is `NUL`-terminated and that no 1072 // other `NUL` bytes exist. 1073 unsafe { CStr::from_bytes_with_nul_unchecked(self.buf.as_slice()) } 1074 } 1075 } 1076 1077 impl DerefMut for CString { 1078 fn deref_mut(&mut self) -> &mut Self::Target { 1079 // SAFETY: A `CString` is always NUL-terminated and contains no other 1080 // NUL bytes. 1081 unsafe { CStr::from_bytes_with_nul_unchecked_mut(self.buf.as_mut_slice()) } 1082 } 1083 } 1084 1085 impl<'a> TryFrom<&'a CStr> for CString { 1086 type Error = AllocError; 1087 1088 fn try_from(cstr: &'a CStr) -> Result<CString, AllocError> { 1089 let mut buf = KVec::new(); 1090 1091 buf.extend_from_slice(cstr.to_bytes_with_nul(), GFP_KERNEL)?; 1092 1093 // INVARIANT: The `CStr` and `CString` types have the same invariants for 1094 // the string data, and we copied it over without changes. 1095 Ok(CString { buf }) 1096 } 1097 } 1098 1099 impl fmt::Debug for CString { 1100 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 1101 fmt::Debug::fmt(&**self, f) 1102 } 1103 } 1104