1 // SPDX-License-Identifier: GPL-2.0 2 3 //! String representations. 4 5 use crate::alloc::{flags::*, AllocError, KVec}; 6 use crate::fmt::{self, Write}; 7 use core::ops::{self, Deref, DerefMut, Index}; 8 9 use crate::prelude::*; 10 11 /// Byte string without UTF-8 validity guarantee. 12 #[repr(transparent)] 13 pub struct BStr([u8]); 14 15 impl BStr { 16 /// Returns the length of this string. 17 #[inline] 18 pub const fn len(&self) -> usize { 19 self.0.len() 20 } 21 22 /// Returns `true` if the string is empty. 23 #[inline] 24 pub const fn is_empty(&self) -> bool { 25 self.len() == 0 26 } 27 28 /// Creates a [`BStr`] from a `[u8]`. 29 #[inline] 30 pub const fn from_bytes(bytes: &[u8]) -> &Self { 31 // SAFETY: `BStr` is transparent to `[u8]`. 32 unsafe { &*(core::ptr::from_ref(bytes) as *const BStr) } 33 } 34 35 /// Strip a prefix from `self`. Delegates to [`slice::strip_prefix`]. 36 /// 37 /// # Examples 38 /// 39 /// ``` 40 /// # use kernel::b_str; 41 /// assert_eq!(Some(b_str!("bar")), b_str!("foobar").strip_prefix(b_str!("foo"))); 42 /// assert_eq!(None, b_str!("foobar").strip_prefix(b_str!("bar"))); 43 /// assert_eq!(Some(b_str!("foobar")), b_str!("foobar").strip_prefix(b_str!(""))); 44 /// assert_eq!(Some(b_str!("")), b_str!("foobar").strip_prefix(b_str!("foobar"))); 45 /// ``` 46 pub fn strip_prefix(&self, pattern: impl AsRef<Self>) -> Option<&BStr> { 47 self.deref() 48 .strip_prefix(pattern.as_ref().deref()) 49 .map(Self::from_bytes) 50 } 51 } 52 53 impl fmt::Display for BStr { 54 /// Formats printable ASCII characters, escaping the rest. 55 /// 56 /// ``` 57 /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; 58 /// let ascii = b_str!("Hello, BStr!"); 59 /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; 60 /// assert_eq!(s.to_bytes(), "Hello, BStr!".as_bytes()); 61 /// 62 /// let non_ascii = b_str!(""); 63 /// let s = CString::try_from_fmt(fmt!("{non_ascii}"))?; 64 /// assert_eq!(s.to_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); 65 /// # Ok::<(), kernel::error::Error>(()) 66 /// ``` 67 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 68 for &b in &self.0 { 69 match b { 70 // Common escape codes. 71 b'\t' => f.write_str("\\t")?, 72 b'\n' => f.write_str("\\n")?, 73 b'\r' => f.write_str("\\r")?, 74 // Printable characters. 75 0x20..=0x7e => f.write_char(b as char)?, 76 _ => write!(f, "\\x{b:02x}")?, 77 } 78 } 79 Ok(()) 80 } 81 } 82 83 impl fmt::Debug for BStr { 84 /// Formats printable ASCII characters with a double quote on either end, 85 /// escaping the rest. 86 /// 87 /// ``` 88 /// # use kernel::{prelude::fmt, b_str, str::{BStr, CString}}; 89 /// // Embedded double quotes are escaped. 90 /// let ascii = b_str!("Hello, \"BStr\"!"); 91 /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; 92 /// assert_eq!(s.to_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); 93 /// 94 /// let non_ascii = b_str!(""); 95 /// let s = CString::try_from_fmt(fmt!("{non_ascii:?}"))?; 96 /// assert_eq!(s.to_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); 97 /// # Ok::<(), kernel::error::Error>(()) 98 /// ``` 99 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 100 f.write_char('"')?; 101 for &b in &self.0 { 102 match b { 103 // Common escape codes. 104 b'\t' => f.write_str("\\t")?, 105 b'\n' => f.write_str("\\n")?, 106 b'\r' => f.write_str("\\r")?, 107 // String escape characters. 108 b'\"' => f.write_str("\\\"")?, 109 b'\\' => f.write_str("\\\\")?, 110 // Printable characters. 111 0x20..=0x7e => f.write_char(b as char)?, 112 _ => write!(f, "\\x{b:02x}")?, 113 } 114 } 115 f.write_char('"') 116 } 117 } 118 119 impl Deref for BStr { 120 type Target = [u8]; 121 122 #[inline] 123 fn deref(&self) -> &Self::Target { 124 &self.0 125 } 126 } 127 128 impl PartialEq for BStr { 129 fn eq(&self, other: &Self) -> bool { 130 self.deref().eq(other.deref()) 131 } 132 } 133 134 impl<Idx> Index<Idx> for BStr 135 where 136 [u8]: Index<Idx, Output = [u8]>, 137 { 138 type Output = Self; 139 140 fn index(&self, index: Idx) -> &Self::Output { 141 BStr::from_bytes(&self.0[index]) 142 } 143 } 144 145 impl AsRef<BStr> for [u8] { 146 fn as_ref(&self) -> &BStr { 147 BStr::from_bytes(self) 148 } 149 } 150 151 impl AsRef<BStr> for BStr { 152 fn as_ref(&self) -> &BStr { 153 self 154 } 155 } 156 157 /// Creates a new [`BStr`] from a string literal. 158 /// 159 /// `b_str!` converts the supplied string literal to byte string, so non-ASCII 160 /// characters can be included. 161 /// 162 /// # Examples 163 /// 164 /// ``` 165 /// # use kernel::b_str; 166 /// # use kernel::str::BStr; 167 /// const MY_BSTR: &BStr = b_str!("My awesome BStr!"); 168 /// ``` 169 #[macro_export] 170 macro_rules! b_str { 171 ($str:literal) => {{ 172 const S: &'static str = $str; 173 const C: &'static $crate::str::BStr = $crate::str::BStr::from_bytes(S.as_bytes()); 174 C 175 }}; 176 } 177 178 /// Returns a C pointer to the string. 179 // It is a free function rather than a method on an extension trait because: 180 // 181 // - error[E0379]: functions in trait impls cannot be declared const 182 #[inline] 183 pub const fn as_char_ptr_in_const_context(c_str: &CStr) -> *const c_char { 184 c_str.0.as_ptr() 185 } 186 187 /// Possible errors when using conversion functions in [`CStr`]. 188 #[derive(Debug, Clone, Copy)] 189 pub enum CStrConvertError { 190 /// Supplied bytes contain an interior `NUL`. 191 InteriorNul, 192 193 /// Supplied bytes are not terminated by `NUL`. 194 NotNulTerminated, 195 } 196 197 impl From<CStrConvertError> for Error { 198 #[inline] 199 fn from(_: CStrConvertError) -> Error { 200 EINVAL 201 } 202 } 203 204 /// A string that is guaranteed to have exactly one `NUL` byte, which is at the 205 /// end. 206 /// 207 /// Used for interoperability with kernel APIs that take C strings. 208 #[repr(transparent)] 209 pub struct CStr([u8]); 210 211 impl CStr { 212 /// Returns the length of this string excluding `NUL`. 213 #[inline] 214 pub const fn len(&self) -> usize { 215 self.len_with_nul() - 1 216 } 217 218 /// Returns the length of this string with `NUL`. 219 #[inline] 220 pub const fn len_with_nul(&self) -> usize { 221 if self.0.is_empty() { 222 // SAFETY: This is one of the invariant of `CStr`. 223 // We add a `unreachable_unchecked` here to hint the optimizer that 224 // the value returned from this function is non-zero. 225 unsafe { core::hint::unreachable_unchecked() }; 226 } 227 self.0.len() 228 } 229 230 /// Returns `true` if the string only includes `NUL`. 231 #[inline] 232 pub const fn is_empty(&self) -> bool { 233 self.len() == 0 234 } 235 236 /// Wraps a raw C string pointer. 237 /// 238 /// # Safety 239 /// 240 /// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must 241 /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr` 242 /// must not be mutated. 243 #[inline] 244 pub unsafe fn from_char_ptr<'a>(ptr: *const c_char) -> &'a Self { 245 // SAFETY: The safety precondition guarantees `ptr` is a valid pointer 246 // to a `NUL`-terminated C string. 247 let len = unsafe { bindings::strlen(ptr) } + 1; 248 // SAFETY: Lifetime guaranteed by the safety precondition. 249 let bytes = unsafe { core::slice::from_raw_parts(ptr.cast(), len) }; 250 // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. 251 // As we have added 1 to `len`, the last byte is known to be `NUL`. 252 unsafe { Self::from_bytes_with_nul_unchecked(bytes) } 253 } 254 255 /// Creates a [`CStr`] from a `[u8]`. 256 /// 257 /// The provided slice must be `NUL`-terminated, does not contain any 258 /// interior `NUL` bytes. 259 pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> { 260 if bytes.is_empty() { 261 return Err(CStrConvertError::NotNulTerminated); 262 } 263 if bytes[bytes.len() - 1] != 0 { 264 return Err(CStrConvertError::NotNulTerminated); 265 } 266 let mut i = 0; 267 // `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking, 268 // while it couldn't optimize away bounds checks for `i < bytes.len() - 1`. 269 while i + 1 < bytes.len() { 270 if bytes[i] == 0 { 271 return Err(CStrConvertError::InteriorNul); 272 } 273 i += 1; 274 } 275 // SAFETY: We just checked that all properties hold. 276 Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) 277 } 278 279 /// Creates a [`CStr`] from a `[u8]` without performing any additional 280 /// checks. 281 /// 282 /// # Safety 283 /// 284 /// `bytes` *must* end with a `NUL` byte, and should only have a single 285 /// `NUL` byte (or the string will be truncated). 286 #[inline] 287 pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { 288 // SAFETY: Properties of `bytes` guaranteed by the safety precondition. 289 unsafe { core::mem::transmute(bytes) } 290 } 291 292 /// Creates a mutable [`CStr`] from a `[u8]` without performing any 293 /// additional checks. 294 /// 295 /// # Safety 296 /// 297 /// `bytes` *must* end with a `NUL` byte, and should only have a single 298 /// `NUL` byte (or the string will be truncated). 299 #[inline] 300 pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { 301 // SAFETY: Properties of `bytes` guaranteed by the safety precondition. 302 unsafe { &mut *(core::ptr::from_mut(bytes) as *mut CStr) } 303 } 304 305 /// Returns a C pointer to the string. 306 /// 307 /// Using this function in a const context is deprecated in favor of 308 /// [`as_char_ptr_in_const_context`] in preparation for replacing `CStr` with `core::ffi::CStr` 309 /// which does not have this method. 310 #[inline] 311 pub const fn as_char_ptr(&self) -> *const c_char { 312 as_char_ptr_in_const_context(self) 313 } 314 315 /// Convert the string to a byte slice without the trailing `NUL` byte. 316 #[inline] 317 pub fn to_bytes(&self) -> &[u8] { 318 &self.0[..self.len()] 319 } 320 321 /// Convert the string to a byte slice without the trailing `NUL` byte. 322 /// 323 /// This function is deprecated in favor of [`Self::to_bytes`] in preparation for replacing 324 /// `CStr` with `core::ffi::CStr` which does not have this method. 325 #[inline] 326 pub fn as_bytes(&self) -> &[u8] { 327 self.to_bytes() 328 } 329 330 /// Convert the string to a byte slice containing the trailing `NUL` byte. 331 #[inline] 332 pub const fn to_bytes_with_nul(&self) -> &[u8] { 333 &self.0 334 } 335 336 /// Convert the string to a byte slice containing the trailing `NUL` byte. 337 /// 338 /// This function is deprecated in favor of [`Self::to_bytes_with_nul`] in preparation for 339 /// replacing `CStr` with `core::ffi::CStr` which does not have this method. 340 #[inline] 341 pub const fn as_bytes_with_nul(&self) -> &[u8] { 342 self.to_bytes_with_nul() 343 } 344 345 /// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8. 346 /// 347 /// If the contents of the [`CStr`] are valid UTF-8 data, this 348 /// function will return the corresponding [`&str`] slice. Otherwise, 349 /// it will return an error with details of where UTF-8 validation failed. 350 /// 351 /// # Examples 352 /// 353 /// ``` 354 /// # use kernel::str::CStr; 355 /// let cstr = CStr::from_bytes_with_nul(b"foo\0")?; 356 /// assert_eq!(cstr.to_str(), Ok("foo")); 357 /// # Ok::<(), kernel::error::Error>(()) 358 /// ``` 359 #[inline] 360 pub fn to_str(&self) -> Result<&str, core::str::Utf8Error> { 361 core::str::from_utf8(self.as_bytes()) 362 } 363 364 /// Unsafely convert this [`CStr`] into a [`&str`], without checking for 365 /// valid UTF-8. 366 /// 367 /// # Safety 368 /// 369 /// The contents must be valid UTF-8. 370 /// 371 /// # Examples 372 /// 373 /// ``` 374 /// # use kernel::c_str; 375 /// # use kernel::str::CStr; 376 /// let bar = c_str!("ツ"); 377 /// // SAFETY: String literals are guaranteed to be valid UTF-8 378 /// // by the Rust compiler. 379 /// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ"); 380 /// ``` 381 #[inline] 382 pub unsafe fn as_str_unchecked(&self) -> &str { 383 // SAFETY: TODO. 384 unsafe { core::str::from_utf8_unchecked(self.as_bytes()) } 385 } 386 387 /// Convert this [`CStr`] into a [`CString`] by allocating memory and 388 /// copying over the string data. 389 pub fn to_cstring(&self) -> Result<CString, AllocError> { 390 CString::try_from(self) 391 } 392 393 /// Converts this [`CStr`] to its ASCII lower case equivalent in-place. 394 /// 395 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', 396 /// but non-ASCII letters are unchanged. 397 /// 398 /// To return a new lowercased value without modifying the existing one, use 399 /// [`to_ascii_lowercase()`]. 400 /// 401 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase 402 pub fn make_ascii_lowercase(&mut self) { 403 // INVARIANT: This doesn't introduce or remove NUL bytes in the C 404 // string. 405 self.0.make_ascii_lowercase(); 406 } 407 408 /// Converts this [`CStr`] to its ASCII upper case equivalent in-place. 409 /// 410 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', 411 /// but non-ASCII letters are unchanged. 412 /// 413 /// To return a new uppercased value without modifying the existing one, use 414 /// [`to_ascii_uppercase()`]. 415 /// 416 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase 417 pub fn make_ascii_uppercase(&mut self) { 418 // INVARIANT: This doesn't introduce or remove NUL bytes in the C 419 // string. 420 self.0.make_ascii_uppercase(); 421 } 422 423 /// Returns a copy of this [`CString`] where each character is mapped to its 424 /// ASCII lower case equivalent. 425 /// 426 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', 427 /// but non-ASCII letters are unchanged. 428 /// 429 /// To lowercase the value in-place, use [`make_ascii_lowercase`]. 430 /// 431 /// [`make_ascii_lowercase`]: str::make_ascii_lowercase 432 pub fn to_ascii_lowercase(&self) -> Result<CString, AllocError> { 433 let mut s = self.to_cstring()?; 434 435 s.make_ascii_lowercase(); 436 437 Ok(s) 438 } 439 440 /// Returns a copy of this [`CString`] where each character is mapped to its 441 /// ASCII upper case equivalent. 442 /// 443 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', 444 /// but non-ASCII letters are unchanged. 445 /// 446 /// To uppercase the value in-place, use [`make_ascii_uppercase`]. 447 /// 448 /// [`make_ascii_uppercase`]: str::make_ascii_uppercase 449 pub fn to_ascii_uppercase(&self) -> Result<CString, AllocError> { 450 let mut s = self.to_cstring()?; 451 452 s.make_ascii_uppercase(); 453 454 Ok(s) 455 } 456 } 457 458 impl fmt::Display for CStr { 459 /// Formats printable ASCII characters, escaping the rest. 460 /// 461 /// ``` 462 /// # use kernel::c_str; 463 /// # use kernel::prelude::fmt; 464 /// # use kernel::str::CStr; 465 /// # use kernel::str::CString; 466 /// let penguin = c_str!(""); 467 /// let s = CString::try_from_fmt(fmt!("{penguin}"))?; 468 /// assert_eq!(s.to_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); 469 /// 470 /// let ascii = c_str!("so \"cool\""); 471 /// let s = CString::try_from_fmt(fmt!("{ascii}"))?; 472 /// assert_eq!(s.to_bytes_with_nul(), "so \"cool\"\0".as_bytes()); 473 /// # Ok::<(), kernel::error::Error>(()) 474 /// ``` 475 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 476 for &c in self.to_bytes() { 477 if (0x20..0x7f).contains(&c) { 478 // Printable character. 479 f.write_char(c as char)?; 480 } else { 481 write!(f, "\\x{c:02x}")?; 482 } 483 } 484 Ok(()) 485 } 486 } 487 488 impl fmt::Debug for CStr { 489 /// Formats printable ASCII characters with a double quote on either end, escaping the rest. 490 /// 491 /// ``` 492 /// # use kernel::c_str; 493 /// # use kernel::prelude::fmt; 494 /// # use kernel::str::CStr; 495 /// # use kernel::str::CString; 496 /// let penguin = c_str!(""); 497 /// let s = CString::try_from_fmt(fmt!("{penguin:?}"))?; 498 /// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes()); 499 /// 500 /// // Embedded double quotes are escaped. 501 /// let ascii = c_str!("so \"cool\""); 502 /// let s = CString::try_from_fmt(fmt!("{ascii:?}"))?; 503 /// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes()); 504 /// # Ok::<(), kernel::error::Error>(()) 505 /// ``` 506 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 507 f.write_str("\"")?; 508 for &c in self.as_bytes() { 509 match c { 510 // Printable characters. 511 b'\"' => f.write_str("\\\"")?, 512 0x20..=0x7e => f.write_char(c as char)?, 513 _ => write!(f, "\\x{c:02x}")?, 514 } 515 } 516 f.write_str("\"") 517 } 518 } 519 520 impl AsRef<BStr> for CStr { 521 #[inline] 522 fn as_ref(&self) -> &BStr { 523 BStr::from_bytes(self.as_bytes()) 524 } 525 } 526 527 impl Deref for CStr { 528 type Target = BStr; 529 530 #[inline] 531 fn deref(&self) -> &Self::Target { 532 self.as_ref() 533 } 534 } 535 536 impl Index<ops::RangeFrom<usize>> for CStr { 537 type Output = CStr; 538 539 #[inline] 540 fn index(&self, index: ops::RangeFrom<usize>) -> &Self::Output { 541 // Delegate bounds checking to slice. 542 // Assign to _ to mute clippy's unnecessary operation warning. 543 let _ = &self.as_bytes()[index.start..]; 544 // SAFETY: We just checked the bounds. 545 unsafe { Self::from_bytes_with_nul_unchecked(&self.0[index.start..]) } 546 } 547 } 548 549 impl Index<ops::RangeFull> for CStr { 550 type Output = CStr; 551 552 #[inline] 553 fn index(&self, _index: ops::RangeFull) -> &Self::Output { 554 self 555 } 556 } 557 558 mod private { 559 use core::ops; 560 561 // Marker trait for index types that can be forward to `BStr`. 562 pub trait CStrIndex {} 563 564 impl CStrIndex for usize {} 565 impl CStrIndex for ops::Range<usize> {} 566 impl CStrIndex for ops::RangeInclusive<usize> {} 567 impl CStrIndex for ops::RangeToInclusive<usize> {} 568 } 569 570 impl<Idx> Index<Idx> for CStr 571 where 572 Idx: private::CStrIndex, 573 BStr: Index<Idx>, 574 { 575 type Output = <BStr as Index<Idx>>::Output; 576 577 #[inline] 578 fn index(&self, index: Idx) -> &Self::Output { 579 &self.as_ref()[index] 580 } 581 } 582 583 /// Creates a new [`CStr`] from a string literal. 584 /// 585 /// The string literal should not contain any `NUL` bytes. 586 /// 587 /// # Examples 588 /// 589 /// ``` 590 /// # use kernel::c_str; 591 /// # use kernel::str::CStr; 592 /// const MY_CSTR: &CStr = c_str!("My awesome CStr!"); 593 /// ``` 594 #[macro_export] 595 macro_rules! c_str { 596 ($str:expr) => {{ 597 const S: &str = concat!($str, "\0"); 598 const C: &$crate::str::CStr = match $crate::str::CStr::from_bytes_with_nul(S.as_bytes()) { 599 Ok(v) => v, 600 Err(_) => panic!("string contains interior NUL"), 601 }; 602 C 603 }}; 604 } 605 606 #[kunit_tests(rust_kernel_str)] 607 mod tests { 608 use super::*; 609 610 macro_rules! format { 611 ($($f:tt)*) => ({ 612 CString::try_from_fmt(fmt!($($f)*))?.to_str()? 613 }) 614 } 615 616 const ALL_ASCII_CHARS: &str = 617 "\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\ 618 \\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f \ 619 !\"#$%&'()*+,-./0123456789:;<=>?@\ 620 ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\ 621 \\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\ 622 \\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\ 623 \\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\ 624 \\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\ 625 \\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\ 626 \\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\ 627 \\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\ 628 \\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff"; 629 630 #[test] 631 fn test_cstr_to_str() -> Result { 632 let good_bytes = b"\xf0\x9f\xa6\x80\0"; 633 let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?; 634 let checked_str = checked_cstr.to_str()?; 635 assert_eq!(checked_str, ""); 636 Ok(()) 637 } 638 639 #[test] 640 fn test_cstr_to_str_invalid_utf8() -> Result { 641 let bad_bytes = b"\xc3\x28\0"; 642 let checked_cstr = CStr::from_bytes_with_nul(bad_bytes)?; 643 assert!(checked_cstr.to_str().is_err()); 644 Ok(()) 645 } 646 647 #[test] 648 fn test_cstr_as_str_unchecked() -> Result { 649 let good_bytes = b"\xf0\x9f\x90\xA7\0"; 650 let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?; 651 // SAFETY: The contents come from a string literal which contains valid UTF-8. 652 let unchecked_str = unsafe { checked_cstr.as_str_unchecked() }; 653 assert_eq!(unchecked_str, ""); 654 Ok(()) 655 } 656 657 #[test] 658 fn test_cstr_display() -> Result { 659 let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?; 660 assert_eq!(format!("{hello_world}"), "hello, world!"); 661 let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?; 662 assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a"); 663 let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?; 664 assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); 665 let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?; 666 assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); 667 Ok(()) 668 } 669 670 #[test] 671 fn test_cstr_display_all_bytes() -> Result { 672 let mut bytes: [u8; 256] = [0; 256]; 673 // fill `bytes` with [1..=255] + [0] 674 for i in u8::MIN..=u8::MAX { 675 bytes[i as usize] = i.wrapping_add(1); 676 } 677 let cstr = CStr::from_bytes_with_nul(&bytes)?; 678 assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS); 679 Ok(()) 680 } 681 682 #[test] 683 fn test_cstr_debug() -> Result { 684 let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?; 685 assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); 686 let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?; 687 assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\""); 688 let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?; 689 assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); 690 let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?; 691 assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); 692 Ok(()) 693 } 694 695 #[test] 696 fn test_bstr_display() -> Result { 697 let hello_world = BStr::from_bytes(b"hello, world!"); 698 assert_eq!(format!("{hello_world}"), "hello, world!"); 699 let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); 700 assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_"); 701 let others = BStr::from_bytes(b"\x01"); 702 assert_eq!(format!("{others}"), "\\x01"); 703 let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); 704 assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); 705 let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); 706 assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); 707 Ok(()) 708 } 709 710 #[test] 711 fn test_bstr_debug() -> Result { 712 let hello_world = BStr::from_bytes(b"hello, world!"); 713 assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); 714 let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); 715 assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); 716 let others = BStr::from_bytes(b"\x01"); 717 assert_eq!(format!("{others:?}"), "\"\\x01\""); 718 let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); 719 assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); 720 let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); 721 assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); 722 Ok(()) 723 } 724 } 725 726 /// Allows formatting of [`fmt::Arguments`] into a raw buffer. 727 /// 728 /// It does not fail if callers write past the end of the buffer so that they can calculate the 729 /// size required to fit everything. 730 /// 731 /// # Invariants 732 /// 733 /// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos` 734 /// is less than `end`. 735 pub(crate) struct RawFormatter { 736 // Use `usize` to use `saturating_*` functions. 737 beg: usize, 738 pos: usize, 739 end: usize, 740 } 741 742 impl RawFormatter { 743 /// Creates a new instance of [`RawFormatter`] with an empty buffer. 744 fn new() -> Self { 745 // INVARIANT: The buffer is empty, so the region that needs to be writable is empty. 746 Self { 747 beg: 0, 748 pos: 0, 749 end: 0, 750 } 751 } 752 753 /// Creates a new instance of [`RawFormatter`] with the given buffer pointers. 754 /// 755 /// # Safety 756 /// 757 /// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end` 758 /// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`]. 759 pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self { 760 // INVARIANT: The safety requirements guarantee the type invariants. 761 Self { 762 beg: pos as usize, 763 pos: pos as usize, 764 end: end as usize, 765 } 766 } 767 768 /// Creates a new instance of [`RawFormatter`] with the given buffer. 769 /// 770 /// # Safety 771 /// 772 /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes 773 /// for the lifetime of the returned [`RawFormatter`]. 774 pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { 775 let pos = buf as usize; 776 // INVARIANT: We ensure that `end` is never less than `buf`, and the safety requirements 777 // guarantees that the memory region is valid for writes. 778 Self { 779 pos, 780 beg: pos, 781 end: pos.saturating_add(len), 782 } 783 } 784 785 /// Returns the current insert position. 786 /// 787 /// N.B. It may point to invalid memory. 788 pub(crate) fn pos(&self) -> *mut u8 { 789 self.pos as *mut u8 790 } 791 792 /// Returns the number of bytes written to the formatter. 793 pub(crate) fn bytes_written(&self) -> usize { 794 self.pos - self.beg 795 } 796 } 797 798 impl fmt::Write for RawFormatter { 799 fn write_str(&mut self, s: &str) -> fmt::Result { 800 // `pos` value after writing `len` bytes. This does not have to be bounded by `end`, but we 801 // don't want it to wrap around to 0. 802 let pos_new = self.pos.saturating_add(s.len()); 803 804 // Amount that we can copy. `saturating_sub` ensures we get 0 if `pos` goes past `end`. 805 let len_to_copy = core::cmp::min(pos_new, self.end).saturating_sub(self.pos); 806 807 if len_to_copy > 0 { 808 // SAFETY: If `len_to_copy` is non-zero, then we know `pos` has not gone past `end` 809 // yet, so it is valid for write per the type invariants. 810 unsafe { 811 core::ptr::copy_nonoverlapping( 812 s.as_bytes().as_ptr(), 813 self.pos as *mut u8, 814 len_to_copy, 815 ) 816 }; 817 } 818 819 self.pos = pos_new; 820 Ok(()) 821 } 822 } 823 824 /// Allows formatting of [`fmt::Arguments`] into a raw buffer. 825 /// 826 /// Fails if callers attempt to write more than will fit in the buffer. 827 pub(crate) struct Formatter(RawFormatter); 828 829 impl Formatter { 830 /// Creates a new instance of [`Formatter`] with the given buffer. 831 /// 832 /// # Safety 833 /// 834 /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes 835 /// for the lifetime of the returned [`Formatter`]. 836 pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { 837 // SAFETY: The safety requirements of this function satisfy those of the callee. 838 Self(unsafe { RawFormatter::from_buffer(buf, len) }) 839 } 840 } 841 842 impl Deref for Formatter { 843 type Target = RawFormatter; 844 845 fn deref(&self) -> &Self::Target { 846 &self.0 847 } 848 } 849 850 impl fmt::Write for Formatter { 851 fn write_str(&mut self, s: &str) -> fmt::Result { 852 self.0.write_str(s)?; 853 854 // Fail the request if we go past the end of the buffer. 855 if self.0.pos > self.0.end { 856 Err(fmt::Error) 857 } else { 858 Ok(()) 859 } 860 } 861 } 862 863 /// An owned string that is guaranteed to have exactly one `NUL` byte, which is at the end. 864 /// 865 /// Used for interoperability with kernel APIs that take C strings. 866 /// 867 /// # Invariants 868 /// 869 /// The string is always `NUL`-terminated and contains no other `NUL` bytes. 870 /// 871 /// # Examples 872 /// 873 /// ``` 874 /// use kernel::{str::CString, prelude::fmt}; 875 /// 876 /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?; 877 /// assert_eq!(s.to_bytes_with_nul(), "abc1020\0".as_bytes()); 878 /// 879 /// let tmp = "testing"; 880 /// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123))?; 881 /// assert_eq!(s.to_bytes_with_nul(), "testing123\0".as_bytes()); 882 /// 883 /// // This fails because it has an embedded `NUL` byte. 884 /// let s = CString::try_from_fmt(fmt!("a\0b{}", 123)); 885 /// assert_eq!(s.is_ok(), false); 886 /// # Ok::<(), kernel::error::Error>(()) 887 /// ``` 888 pub struct CString { 889 buf: KVec<u8>, 890 } 891 892 impl CString { 893 /// Creates an instance of [`CString`] from the given formatted arguments. 894 pub fn try_from_fmt(args: fmt::Arguments<'_>) -> Result<Self, Error> { 895 // Calculate the size needed (formatted string plus `NUL` terminator). 896 let mut f = RawFormatter::new(); 897 f.write_fmt(args)?; 898 f.write_str("\0")?; 899 let size = f.bytes_written(); 900 901 // Allocate a vector with the required number of bytes, and write to it. 902 let mut buf = KVec::with_capacity(size, GFP_KERNEL)?; 903 // SAFETY: The buffer stored in `buf` is at least of size `size` and is valid for writes. 904 let mut f = unsafe { Formatter::from_buffer(buf.as_mut_ptr(), size) }; 905 f.write_fmt(args)?; 906 f.write_str("\0")?; 907 908 // SAFETY: The number of bytes that can be written to `f` is bounded by `size`, which is 909 // `buf`'s capacity. The contents of the buffer have been initialised by writes to `f`. 910 unsafe { buf.inc_len(f.bytes_written()) }; 911 912 // Check that there are no `NUL` bytes before the end. 913 // SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size` 914 // (which the minimum buffer size) and is non-zero (we wrote at least the `NUL` terminator) 915 // so `f.bytes_written() - 1` doesn't underflow. 916 let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, f.bytes_written() - 1) }; 917 if !ptr.is_null() { 918 return Err(EINVAL); 919 } 920 921 // INVARIANT: We wrote the `NUL` terminator and checked above that no other `NUL` bytes 922 // exist in the buffer. 923 Ok(Self { buf }) 924 } 925 } 926 927 impl Deref for CString { 928 type Target = CStr; 929 930 fn deref(&self) -> &Self::Target { 931 // SAFETY: The type invariants guarantee that the string is `NUL`-terminated and that no 932 // other `NUL` bytes exist. 933 unsafe { CStr::from_bytes_with_nul_unchecked(self.buf.as_slice()) } 934 } 935 } 936 937 impl DerefMut for CString { 938 fn deref_mut(&mut self) -> &mut Self::Target { 939 // SAFETY: A `CString` is always NUL-terminated and contains no other 940 // NUL bytes. 941 unsafe { CStr::from_bytes_with_nul_unchecked_mut(self.buf.as_mut_slice()) } 942 } 943 } 944 945 impl<'a> TryFrom<&'a CStr> for CString { 946 type Error = AllocError; 947 948 fn try_from(cstr: &'a CStr) -> Result<CString, AllocError> { 949 let mut buf = KVec::new(); 950 951 buf.extend_from_slice(cstr.to_bytes_with_nul(), GFP_KERNEL)?; 952 953 // INVARIANT: The `CStr` and `CString` types have the same invariants for 954 // the string data, and we copied it over without changes. 955 Ok(CString { buf }) 956 } 957 } 958 959 impl fmt::Debug for CString { 960 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 961 fmt::Debug::fmt(&**self, f) 962 } 963 } 964