1 // SPDX-License-Identifier: GPL-2.0 2 3 //! String representations. 4 5 use crate::alloc::{flags::*, AllocError, KVec}; 6 use core::fmt::{self, Write}; 7 use core::ops::{self, Deref, DerefMut, Index}; 8 9 use crate::prelude::*; 10 11 /// Byte string without UTF-8 validity guarantee. 12 #[repr(transparent)] 13 pub struct BStr([u8]); 14 15 impl BStr { 16 /// Returns the length of this string. 17 #[inline] len(&self) -> usize18 pub const fn len(&self) -> usize { 19 self.0.len() 20 } 21 22 /// Returns `true` if the string is empty. 23 #[inline] is_empty(&self) -> bool24 pub const fn is_empty(&self) -> bool { 25 self.len() == 0 26 } 27 28 /// Creates a [`BStr`] from a `[u8]`. 29 #[inline] from_bytes(bytes: &[u8]) -> &Self30 pub const fn from_bytes(bytes: &[u8]) -> &Self { 31 // SAFETY: `BStr` is transparent to `[u8]`. 32 unsafe { &*(bytes as *const [u8] as *const BStr) } 33 } 34 35 /// Strip a prefix from `self`. Delegates to [`slice::strip_prefix`]. 36 /// 37 /// # Examples 38 /// 39 /// ``` 40 /// # use kernel::b_str; 41 /// assert_eq!(Some(b_str!("bar")), b_str!("foobar").strip_prefix(b_str!("foo"))); 42 /// assert_eq!(None, b_str!("foobar").strip_prefix(b_str!("bar"))); 43 /// assert_eq!(Some(b_str!("foobar")), b_str!("foobar").strip_prefix(b_str!(""))); 44 /// assert_eq!(Some(b_str!("")), b_str!("foobar").strip_prefix(b_str!("foobar"))); 45 /// ``` strip_prefix(&self, pattern: impl AsRef<Self>) -> Option<&BStr>46 pub fn strip_prefix(&self, pattern: impl AsRef<Self>) -> Option<&BStr> { 47 self.deref() 48 .strip_prefix(pattern.as_ref().deref()) 49 .map(Self::from_bytes) 50 } 51 } 52 53 impl fmt::Display for BStr { 54 /// Formats printable ASCII characters, escaping the rest. 55 /// 56 /// ``` 57 /// # use kernel::{fmt, b_str, str::{BStr, CString}}; 58 /// let ascii = b_str!("Hello, BStr!"); 59 /// let s = CString::try_from_fmt(fmt!("{}", ascii))?; 60 /// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes()); 61 /// 62 /// let non_ascii = b_str!(""); 63 /// let s = CString::try_from_fmt(fmt!("{}", non_ascii))?; 64 /// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes()); 65 /// # Ok::<(), kernel::error::Error>(()) 66 /// ``` fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result67 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 68 for &b in &self.0 { 69 match b { 70 // Common escape codes. 71 b'\t' => f.write_str("\\t")?, 72 b'\n' => f.write_str("\\n")?, 73 b'\r' => f.write_str("\\r")?, 74 // Printable characters. 75 0x20..=0x7e => f.write_char(b as char)?, 76 _ => write!(f, "\\x{b:02x}")?, 77 } 78 } 79 Ok(()) 80 } 81 } 82 83 impl fmt::Debug for BStr { 84 /// Formats printable ASCII characters with a double quote on either end, 85 /// escaping the rest. 86 /// 87 /// ``` 88 /// # use kernel::{fmt, b_str, str::{BStr, CString}}; 89 /// // Embedded double quotes are escaped. 90 /// let ascii = b_str!("Hello, \"BStr\"!"); 91 /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?; 92 /// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes()); 93 /// 94 /// let non_ascii = b_str!(""); 95 /// let s = CString::try_from_fmt(fmt!("{:?}", non_ascii))?; 96 /// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes()); 97 /// # Ok::<(), kernel::error::Error>(()) 98 /// ``` fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result99 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 100 f.write_char('"')?; 101 for &b in &self.0 { 102 match b { 103 // Common escape codes. 104 b'\t' => f.write_str("\\t")?, 105 b'\n' => f.write_str("\\n")?, 106 b'\r' => f.write_str("\\r")?, 107 // String escape characters. 108 b'\"' => f.write_str("\\\"")?, 109 b'\\' => f.write_str("\\\\")?, 110 // Printable characters. 111 0x20..=0x7e => f.write_char(b as char)?, 112 _ => write!(f, "\\x{b:02x}")?, 113 } 114 } 115 f.write_char('"') 116 } 117 } 118 119 impl Deref for BStr { 120 type Target = [u8]; 121 122 #[inline] deref(&self) -> &Self::Target123 fn deref(&self) -> &Self::Target { 124 &self.0 125 } 126 } 127 128 impl PartialEq for BStr { eq(&self, other: &Self) -> bool129 fn eq(&self, other: &Self) -> bool { 130 self.deref().eq(other.deref()) 131 } 132 } 133 134 impl<Idx> Index<Idx> for BStr 135 where 136 [u8]: Index<Idx, Output = [u8]>, 137 { 138 type Output = Self; 139 index(&self, index: Idx) -> &Self::Output140 fn index(&self, index: Idx) -> &Self::Output { 141 BStr::from_bytes(&self.0[index]) 142 } 143 } 144 145 impl AsRef<BStr> for [u8] { as_ref(&self) -> &BStr146 fn as_ref(&self) -> &BStr { 147 BStr::from_bytes(self) 148 } 149 } 150 151 impl AsRef<BStr> for BStr { as_ref(&self) -> &BStr152 fn as_ref(&self) -> &BStr { 153 self 154 } 155 } 156 157 /// Creates a new [`BStr`] from a string literal. 158 /// 159 /// `b_str!` converts the supplied string literal to byte string, so non-ASCII 160 /// characters can be included. 161 /// 162 /// # Examples 163 /// 164 /// ``` 165 /// # use kernel::b_str; 166 /// # use kernel::str::BStr; 167 /// const MY_BSTR: &BStr = b_str!("My awesome BStr!"); 168 /// ``` 169 #[macro_export] 170 macro_rules! b_str { 171 ($str:literal) => {{ 172 const S: &'static str = $str; 173 const C: &'static $crate::str::BStr = $crate::str::BStr::from_bytes(S.as_bytes()); 174 C 175 }}; 176 } 177 178 /// Possible errors when using conversion functions in [`CStr`]. 179 #[derive(Debug, Clone, Copy)] 180 pub enum CStrConvertError { 181 /// Supplied bytes contain an interior `NUL`. 182 InteriorNul, 183 184 /// Supplied bytes are not terminated by `NUL`. 185 NotNulTerminated, 186 } 187 188 impl From<CStrConvertError> for Error { 189 #[inline] from(_: CStrConvertError) -> Error190 fn from(_: CStrConvertError) -> Error { 191 EINVAL 192 } 193 } 194 195 /// A string that is guaranteed to have exactly one `NUL` byte, which is at the 196 /// end. 197 /// 198 /// Used for interoperability with kernel APIs that take C strings. 199 #[repr(transparent)] 200 pub struct CStr([u8]); 201 202 impl CStr { 203 /// Returns the length of this string excluding `NUL`. 204 #[inline] len(&self) -> usize205 pub const fn len(&self) -> usize { 206 self.len_with_nul() - 1 207 } 208 209 /// Returns the length of this string with `NUL`. 210 #[inline] len_with_nul(&self) -> usize211 pub const fn len_with_nul(&self) -> usize { 212 if self.0.is_empty() { 213 // SAFETY: This is one of the invariant of `CStr`. 214 // We add a `unreachable_unchecked` here to hint the optimizer that 215 // the value returned from this function is non-zero. 216 unsafe { core::hint::unreachable_unchecked() }; 217 } 218 self.0.len() 219 } 220 221 /// Returns `true` if the string only includes `NUL`. 222 #[inline] is_empty(&self) -> bool223 pub const fn is_empty(&self) -> bool { 224 self.len() == 0 225 } 226 227 /// Wraps a raw C string pointer. 228 /// 229 /// # Safety 230 /// 231 /// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must 232 /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr` 233 /// must not be mutated. 234 #[inline] from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self235 pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self { 236 // SAFETY: The safety precondition guarantees `ptr` is a valid pointer 237 // to a `NUL`-terminated C string. 238 let len = unsafe { bindings::strlen(ptr) } + 1; 239 // SAFETY: Lifetime guaranteed by the safety precondition. 240 let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len) }; 241 // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. 242 // As we have added 1 to `len`, the last byte is known to be `NUL`. 243 unsafe { Self::from_bytes_with_nul_unchecked(bytes) } 244 } 245 246 /// Creates a [`CStr`] from a `[u8]`. 247 /// 248 /// The provided slice must be `NUL`-terminated, does not contain any 249 /// interior `NUL` bytes. from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError>250 pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> { 251 if bytes.is_empty() { 252 return Err(CStrConvertError::NotNulTerminated); 253 } 254 if bytes[bytes.len() - 1] != 0 { 255 return Err(CStrConvertError::NotNulTerminated); 256 } 257 let mut i = 0; 258 // `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking, 259 // while it couldn't optimize away bounds checks for `i < bytes.len() - 1`. 260 while i + 1 < bytes.len() { 261 if bytes[i] == 0 { 262 return Err(CStrConvertError::InteriorNul); 263 } 264 i += 1; 265 } 266 // SAFETY: We just checked that all properties hold. 267 Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) 268 } 269 270 /// Creates a [`CStr`] from a `[u8]` without performing any additional 271 /// checks. 272 /// 273 /// # Safety 274 /// 275 /// `bytes` *must* end with a `NUL` byte, and should only have a single 276 /// `NUL` byte (or the string will be truncated). 277 #[inline] from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr278 pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { 279 // SAFETY: Properties of `bytes` guaranteed by the safety precondition. 280 unsafe { core::mem::transmute(bytes) } 281 } 282 283 /// Creates a mutable [`CStr`] from a `[u8]` without performing any 284 /// additional checks. 285 /// 286 /// # Safety 287 /// 288 /// `bytes` *must* end with a `NUL` byte, and should only have a single 289 /// `NUL` byte (or the string will be truncated). 290 #[inline] from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr291 pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { 292 // SAFETY: Properties of `bytes` guaranteed by the safety precondition. 293 unsafe { &mut *(bytes as *mut [u8] as *mut CStr) } 294 } 295 296 /// Returns a C pointer to the string. 297 #[inline] as_char_ptr(&self) -> *const crate::ffi::c_char298 pub const fn as_char_ptr(&self) -> *const crate::ffi::c_char { 299 self.0.as_ptr() 300 } 301 302 /// Convert the string to a byte slice without the trailing `NUL` byte. 303 #[inline] as_bytes(&self) -> &[u8]304 pub fn as_bytes(&self) -> &[u8] { 305 &self.0[..self.len()] 306 } 307 308 /// Convert the string to a byte slice containing the trailing `NUL` byte. 309 #[inline] as_bytes_with_nul(&self) -> &[u8]310 pub const fn as_bytes_with_nul(&self) -> &[u8] { 311 &self.0 312 } 313 314 /// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8. 315 /// 316 /// If the contents of the [`CStr`] are valid UTF-8 data, this 317 /// function will return the corresponding [`&str`] slice. Otherwise, 318 /// it will return an error with details of where UTF-8 validation failed. 319 /// 320 /// # Examples 321 /// 322 /// ``` 323 /// # use kernel::str::CStr; 324 /// let cstr = CStr::from_bytes_with_nul(b"foo\0")?; 325 /// assert_eq!(cstr.to_str(), Ok("foo")); 326 /// # Ok::<(), kernel::error::Error>(()) 327 /// ``` 328 #[inline] to_str(&self) -> Result<&str, core::str::Utf8Error>329 pub fn to_str(&self) -> Result<&str, core::str::Utf8Error> { 330 core::str::from_utf8(self.as_bytes()) 331 } 332 333 /// Unsafely convert this [`CStr`] into a [`&str`], without checking for 334 /// valid UTF-8. 335 /// 336 /// # Safety 337 /// 338 /// The contents must be valid UTF-8. 339 /// 340 /// # Examples 341 /// 342 /// ``` 343 /// # use kernel::c_str; 344 /// # use kernel::str::CStr; 345 /// let bar = c_str!("ツ"); 346 /// // SAFETY: String literals are guaranteed to be valid UTF-8 347 /// // by the Rust compiler. 348 /// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ"); 349 /// ``` 350 #[inline] as_str_unchecked(&self) -> &str351 pub unsafe fn as_str_unchecked(&self) -> &str { 352 // SAFETY: TODO. 353 unsafe { core::str::from_utf8_unchecked(self.as_bytes()) } 354 } 355 356 /// Convert this [`CStr`] into a [`CString`] by allocating memory and 357 /// copying over the string data. to_cstring(&self) -> Result<CString, AllocError>358 pub fn to_cstring(&self) -> Result<CString, AllocError> { 359 CString::try_from(self) 360 } 361 362 /// Converts this [`CStr`] to its ASCII lower case equivalent in-place. 363 /// 364 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', 365 /// but non-ASCII letters are unchanged. 366 /// 367 /// To return a new lowercased value without modifying the existing one, use 368 /// [`to_ascii_lowercase()`]. 369 /// 370 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase make_ascii_lowercase(&mut self)371 pub fn make_ascii_lowercase(&mut self) { 372 // INVARIANT: This doesn't introduce or remove NUL bytes in the C 373 // string. 374 self.0.make_ascii_lowercase(); 375 } 376 377 /// Converts this [`CStr`] to its ASCII upper case equivalent in-place. 378 /// 379 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', 380 /// but non-ASCII letters are unchanged. 381 /// 382 /// To return a new uppercased value without modifying the existing one, use 383 /// [`to_ascii_uppercase()`]. 384 /// 385 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase make_ascii_uppercase(&mut self)386 pub fn make_ascii_uppercase(&mut self) { 387 // INVARIANT: This doesn't introduce or remove NUL bytes in the C 388 // string. 389 self.0.make_ascii_uppercase(); 390 } 391 392 /// Returns a copy of this [`CString`] where each character is mapped to its 393 /// ASCII lower case equivalent. 394 /// 395 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', 396 /// but non-ASCII letters are unchanged. 397 /// 398 /// To lowercase the value in-place, use [`make_ascii_lowercase`]. 399 /// 400 /// [`make_ascii_lowercase`]: str::make_ascii_lowercase to_ascii_lowercase(&self) -> Result<CString, AllocError>401 pub fn to_ascii_lowercase(&self) -> Result<CString, AllocError> { 402 let mut s = self.to_cstring()?; 403 404 s.make_ascii_lowercase(); 405 406 Ok(s) 407 } 408 409 /// Returns a copy of this [`CString`] where each character is mapped to its 410 /// ASCII upper case equivalent. 411 /// 412 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', 413 /// but non-ASCII letters are unchanged. 414 /// 415 /// To uppercase the value in-place, use [`make_ascii_uppercase`]. 416 /// 417 /// [`make_ascii_uppercase`]: str::make_ascii_uppercase to_ascii_uppercase(&self) -> Result<CString, AllocError>418 pub fn to_ascii_uppercase(&self) -> Result<CString, AllocError> { 419 let mut s = self.to_cstring()?; 420 421 s.make_ascii_uppercase(); 422 423 Ok(s) 424 } 425 } 426 427 impl fmt::Display for CStr { 428 /// Formats printable ASCII characters, escaping the rest. 429 /// 430 /// ``` 431 /// # use kernel::c_str; 432 /// # use kernel::fmt; 433 /// # use kernel::str::CStr; 434 /// # use kernel::str::CString; 435 /// let penguin = c_str!(""); 436 /// let s = CString::try_from_fmt(fmt!("{}", penguin))?; 437 /// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes()); 438 /// 439 /// let ascii = c_str!("so \"cool\""); 440 /// let s = CString::try_from_fmt(fmt!("{}", ascii))?; 441 /// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes()); 442 /// # Ok::<(), kernel::error::Error>(()) 443 /// ``` fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result444 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 445 for &c in self.as_bytes() { 446 if (0x20..0x7f).contains(&c) { 447 // Printable character. 448 f.write_char(c as char)?; 449 } else { 450 write!(f, "\\x{c:02x}")?; 451 } 452 } 453 Ok(()) 454 } 455 } 456 457 impl fmt::Debug for CStr { 458 /// Formats printable ASCII characters with a double quote on either end, escaping the rest. 459 /// 460 /// ``` 461 /// # use kernel::c_str; 462 /// # use kernel::fmt; 463 /// # use kernel::str::CStr; 464 /// # use kernel::str::CString; 465 /// let penguin = c_str!(""); 466 /// let s = CString::try_from_fmt(fmt!("{:?}", penguin))?; 467 /// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes()); 468 /// 469 /// // Embedded double quotes are escaped. 470 /// let ascii = c_str!("so \"cool\""); 471 /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?; 472 /// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes()); 473 /// # Ok::<(), kernel::error::Error>(()) 474 /// ``` fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result475 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 476 f.write_str("\"")?; 477 for &c in self.as_bytes() { 478 match c { 479 // Printable characters. 480 b'\"' => f.write_str("\\\"")?, 481 0x20..=0x7e => f.write_char(c as char)?, 482 _ => write!(f, "\\x{c:02x}")?, 483 } 484 } 485 f.write_str("\"") 486 } 487 } 488 489 impl AsRef<BStr> for CStr { 490 #[inline] as_ref(&self) -> &BStr491 fn as_ref(&self) -> &BStr { 492 BStr::from_bytes(self.as_bytes()) 493 } 494 } 495 496 impl Deref for CStr { 497 type Target = BStr; 498 499 #[inline] deref(&self) -> &Self::Target500 fn deref(&self) -> &Self::Target { 501 self.as_ref() 502 } 503 } 504 505 impl Index<ops::RangeFrom<usize>> for CStr { 506 type Output = CStr; 507 508 #[inline] index(&self, index: ops::RangeFrom<usize>) -> &Self::Output509 fn index(&self, index: ops::RangeFrom<usize>) -> &Self::Output { 510 // Delegate bounds checking to slice. 511 // Assign to _ to mute clippy's unnecessary operation warning. 512 let _ = &self.as_bytes()[index.start..]; 513 // SAFETY: We just checked the bounds. 514 unsafe { Self::from_bytes_with_nul_unchecked(&self.0[index.start..]) } 515 } 516 } 517 518 impl Index<ops::RangeFull> for CStr { 519 type Output = CStr; 520 521 #[inline] index(&self, _index: ops::RangeFull) -> &Self::Output522 fn index(&self, _index: ops::RangeFull) -> &Self::Output { 523 self 524 } 525 } 526 527 mod private { 528 use core::ops; 529 530 // Marker trait for index types that can be forward to `BStr`. 531 pub trait CStrIndex {} 532 533 impl CStrIndex for usize {} 534 impl CStrIndex for ops::Range<usize> {} 535 impl CStrIndex for ops::RangeInclusive<usize> {} 536 impl CStrIndex for ops::RangeToInclusive<usize> {} 537 } 538 539 impl<Idx> Index<Idx> for CStr 540 where 541 Idx: private::CStrIndex, 542 BStr: Index<Idx>, 543 { 544 type Output = <BStr as Index<Idx>>::Output; 545 546 #[inline] index(&self, index: Idx) -> &Self::Output547 fn index(&self, index: Idx) -> &Self::Output { 548 &self.as_ref()[index] 549 } 550 } 551 552 /// Creates a new [`CStr`] from a string literal. 553 /// 554 /// The string literal should not contain any `NUL` bytes. 555 /// 556 /// # Examples 557 /// 558 /// ``` 559 /// # use kernel::c_str; 560 /// # use kernel::str::CStr; 561 /// const MY_CSTR: &CStr = c_str!("My awesome CStr!"); 562 /// ``` 563 #[macro_export] 564 macro_rules! c_str { 565 ($str:expr) => {{ 566 const S: &str = concat!($str, "\0"); 567 const C: &$crate::str::CStr = match $crate::str::CStr::from_bytes_with_nul(S.as_bytes()) { 568 Ok(v) => v, 569 Err(_) => panic!("string contains interior NUL"), 570 }; 571 C 572 }}; 573 } 574 575 #[kunit_tests(rust_kernel_str)] 576 mod tests { 577 use super::*; 578 579 macro_rules! format { 580 ($($f:tt)*) => ({ 581 CString::try_from_fmt(::kernel::fmt!($($f)*))?.to_str()? 582 }) 583 } 584 585 const ALL_ASCII_CHARS: &str = 586 "\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\ 587 \\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f \ 588 !\"#$%&'()*+,-./0123456789:;<=>?@\ 589 ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\ 590 \\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\ 591 \\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\ 592 \\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\ 593 \\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\ 594 \\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\ 595 \\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\ 596 \\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\ 597 \\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff"; 598 599 #[test] test_cstr_to_str() -> Result600 fn test_cstr_to_str() -> Result { 601 let good_bytes = b"\xf0\x9f\xa6\x80\0"; 602 let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?; 603 let checked_str = checked_cstr.to_str()?; 604 assert_eq!(checked_str, ""); 605 Ok(()) 606 } 607 608 #[test] test_cstr_to_str_invalid_utf8() -> Result609 fn test_cstr_to_str_invalid_utf8() -> Result { 610 let bad_bytes = b"\xc3\x28\0"; 611 let checked_cstr = CStr::from_bytes_with_nul(bad_bytes)?; 612 assert!(checked_cstr.to_str().is_err()); 613 Ok(()) 614 } 615 616 #[test] test_cstr_as_str_unchecked() -> Result617 fn test_cstr_as_str_unchecked() -> Result { 618 let good_bytes = b"\xf0\x9f\x90\xA7\0"; 619 let checked_cstr = CStr::from_bytes_with_nul(good_bytes)?; 620 // SAFETY: The contents come from a string literal which contains valid UTF-8. 621 let unchecked_str = unsafe { checked_cstr.as_str_unchecked() }; 622 assert_eq!(unchecked_str, ""); 623 Ok(()) 624 } 625 626 #[test] test_cstr_display() -> Result627 fn test_cstr_display() -> Result { 628 let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?; 629 assert_eq!(format!("{hello_world}"), "hello, world!"); 630 let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?; 631 assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a"); 632 let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?; 633 assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); 634 let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?; 635 assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); 636 Ok(()) 637 } 638 639 #[test] test_cstr_display_all_bytes() -> Result640 fn test_cstr_display_all_bytes() -> Result { 641 let mut bytes: [u8; 256] = [0; 256]; 642 // fill `bytes` with [1..=255] + [0] 643 for i in u8::MIN..=u8::MAX { 644 bytes[i as usize] = i.wrapping_add(1); 645 } 646 let cstr = CStr::from_bytes_with_nul(&bytes)?; 647 assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS); 648 Ok(()) 649 } 650 651 #[test] test_cstr_debug() -> Result652 fn test_cstr_debug() -> Result { 653 let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0")?; 654 assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); 655 let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0")?; 656 assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\""); 657 let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0")?; 658 assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); 659 let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0")?; 660 assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); 661 Ok(()) 662 } 663 664 #[test] test_bstr_display() -> Result665 fn test_bstr_display() -> Result { 666 let hello_world = BStr::from_bytes(b"hello, world!"); 667 assert_eq!(format!("{hello_world}"), "hello, world!"); 668 let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); 669 assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_"); 670 let others = BStr::from_bytes(b"\x01"); 671 assert_eq!(format!("{others}"), "\\x01"); 672 let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); 673 assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); 674 let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); 675 assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); 676 Ok(()) 677 } 678 679 #[test] test_bstr_debug() -> Result680 fn test_bstr_debug() -> Result { 681 let hello_world = BStr::from_bytes(b"hello, world!"); 682 assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); 683 let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); 684 assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); 685 let others = BStr::from_bytes(b"\x01"); 686 assert_eq!(format!("{others:?}"), "\"\\x01\""); 687 let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); 688 assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); 689 let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); 690 assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); 691 Ok(()) 692 } 693 } 694 695 /// Allows formatting of [`fmt::Arguments`] into a raw buffer. 696 /// 697 /// It does not fail if callers write past the end of the buffer so that they can calculate the 698 /// size required to fit everything. 699 /// 700 /// # Invariants 701 /// 702 /// The memory region between `pos` (inclusive) and `end` (exclusive) is valid for writes if `pos` 703 /// is less than `end`. 704 pub(crate) struct RawFormatter { 705 // Use `usize` to use `saturating_*` functions. 706 beg: usize, 707 pos: usize, 708 end: usize, 709 } 710 711 impl RawFormatter { 712 /// Creates a new instance of [`RawFormatter`] with an empty buffer. new() -> Self713 fn new() -> Self { 714 // INVARIANT: The buffer is empty, so the region that needs to be writable is empty. 715 Self { 716 beg: 0, 717 pos: 0, 718 end: 0, 719 } 720 } 721 722 /// Creates a new instance of [`RawFormatter`] with the given buffer pointers. 723 /// 724 /// # Safety 725 /// 726 /// If `pos` is less than `end`, then the region between `pos` (inclusive) and `end` 727 /// (exclusive) must be valid for writes for the lifetime of the returned [`RawFormatter`]. from_ptrs(pos: *mut u8, end: *mut u8) -> Self728 pub(crate) unsafe fn from_ptrs(pos: *mut u8, end: *mut u8) -> Self { 729 // INVARIANT: The safety requirements guarantee the type invariants. 730 Self { 731 beg: pos as _, 732 pos: pos as _, 733 end: end as _, 734 } 735 } 736 737 /// Creates a new instance of [`RawFormatter`] with the given buffer. 738 /// 739 /// # Safety 740 /// 741 /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes 742 /// for the lifetime of the returned [`RawFormatter`]. from_buffer(buf: *mut u8, len: usize) -> Self743 pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { 744 let pos = buf as usize; 745 // INVARIANT: We ensure that `end` is never less than `buf`, and the safety requirements 746 // guarantees that the memory region is valid for writes. 747 Self { 748 pos, 749 beg: pos, 750 end: pos.saturating_add(len), 751 } 752 } 753 754 /// Returns the current insert position. 755 /// 756 /// N.B. It may point to invalid memory. pos(&self) -> *mut u8757 pub(crate) fn pos(&self) -> *mut u8 { 758 self.pos as _ 759 } 760 761 /// Returns the number of bytes written to the formatter. bytes_written(&self) -> usize762 pub(crate) fn bytes_written(&self) -> usize { 763 self.pos - self.beg 764 } 765 } 766 767 impl fmt::Write for RawFormatter { write_str(&mut self, s: &str) -> fmt::Result768 fn write_str(&mut self, s: &str) -> fmt::Result { 769 // `pos` value after writing `len` bytes. This does not have to be bounded by `end`, but we 770 // don't want it to wrap around to 0. 771 let pos_new = self.pos.saturating_add(s.len()); 772 773 // Amount that we can copy. `saturating_sub` ensures we get 0 if `pos` goes past `end`. 774 let len_to_copy = core::cmp::min(pos_new, self.end).saturating_sub(self.pos); 775 776 if len_to_copy > 0 { 777 // SAFETY: If `len_to_copy` is non-zero, then we know `pos` has not gone past `end` 778 // yet, so it is valid for write per the type invariants. 779 unsafe { 780 core::ptr::copy_nonoverlapping( 781 s.as_bytes().as_ptr(), 782 self.pos as *mut u8, 783 len_to_copy, 784 ) 785 }; 786 } 787 788 self.pos = pos_new; 789 Ok(()) 790 } 791 } 792 793 /// Allows formatting of [`fmt::Arguments`] into a raw buffer. 794 /// 795 /// Fails if callers attempt to write more than will fit in the buffer. 796 pub(crate) struct Formatter(RawFormatter); 797 798 impl Formatter { 799 /// Creates a new instance of [`Formatter`] with the given buffer. 800 /// 801 /// # Safety 802 /// 803 /// The memory region starting at `buf` and extending for `len` bytes must be valid for writes 804 /// for the lifetime of the returned [`Formatter`]. from_buffer(buf: *mut u8, len: usize) -> Self805 pub(crate) unsafe fn from_buffer(buf: *mut u8, len: usize) -> Self { 806 // SAFETY: The safety requirements of this function satisfy those of the callee. 807 Self(unsafe { RawFormatter::from_buffer(buf, len) }) 808 } 809 } 810 811 impl Deref for Formatter { 812 type Target = RawFormatter; 813 deref(&self) -> &Self::Target814 fn deref(&self) -> &Self::Target { 815 &self.0 816 } 817 } 818 819 impl fmt::Write for Formatter { write_str(&mut self, s: &str) -> fmt::Result820 fn write_str(&mut self, s: &str) -> fmt::Result { 821 self.0.write_str(s)?; 822 823 // Fail the request if we go past the end of the buffer. 824 if self.0.pos > self.0.end { 825 Err(fmt::Error) 826 } else { 827 Ok(()) 828 } 829 } 830 } 831 832 /// An owned string that is guaranteed to have exactly one `NUL` byte, which is at the end. 833 /// 834 /// Used for interoperability with kernel APIs that take C strings. 835 /// 836 /// # Invariants 837 /// 838 /// The string is always `NUL`-terminated and contains no other `NUL` bytes. 839 /// 840 /// # Examples 841 /// 842 /// ``` 843 /// use kernel::{str::CString, fmt}; 844 /// 845 /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?; 846 /// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes()); 847 /// 848 /// let tmp = "testing"; 849 /// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123))?; 850 /// assert_eq!(s.as_bytes_with_nul(), "testing123\0".as_bytes()); 851 /// 852 /// // This fails because it has an embedded `NUL` byte. 853 /// let s = CString::try_from_fmt(fmt!("a\0b{}", 123)); 854 /// assert_eq!(s.is_ok(), false); 855 /// # Ok::<(), kernel::error::Error>(()) 856 /// ``` 857 pub struct CString { 858 buf: KVec<u8>, 859 } 860 861 impl CString { 862 /// Creates an instance of [`CString`] from the given formatted arguments. try_from_fmt(args: fmt::Arguments<'_>) -> Result<Self, Error>863 pub fn try_from_fmt(args: fmt::Arguments<'_>) -> Result<Self, Error> { 864 // Calculate the size needed (formatted string plus `NUL` terminator). 865 let mut f = RawFormatter::new(); 866 f.write_fmt(args)?; 867 f.write_str("\0")?; 868 let size = f.bytes_written(); 869 870 // Allocate a vector with the required number of bytes, and write to it. 871 let mut buf = KVec::with_capacity(size, GFP_KERNEL)?; 872 // SAFETY: The buffer stored in `buf` is at least of size `size` and is valid for writes. 873 let mut f = unsafe { Formatter::from_buffer(buf.as_mut_ptr(), size) }; 874 f.write_fmt(args)?; 875 f.write_str("\0")?; 876 877 // SAFETY: The number of bytes that can be written to `f` is bounded by `size`, which is 878 // `buf`'s capacity. The contents of the buffer have been initialised by writes to `f`. 879 unsafe { buf.inc_len(f.bytes_written()) }; 880 881 // Check that there are no `NUL` bytes before the end. 882 // SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size` 883 // (which the minimum buffer size) and is non-zero (we wrote at least the `NUL` terminator) 884 // so `f.bytes_written() - 1` doesn't underflow. 885 let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, f.bytes_written() - 1) }; 886 if !ptr.is_null() { 887 return Err(EINVAL); 888 } 889 890 // INVARIANT: We wrote the `NUL` terminator and checked above that no other `NUL` bytes 891 // exist in the buffer. 892 Ok(Self { buf }) 893 } 894 } 895 896 impl Deref for CString { 897 type Target = CStr; 898 deref(&self) -> &Self::Target899 fn deref(&self) -> &Self::Target { 900 // SAFETY: The type invariants guarantee that the string is `NUL`-terminated and that no 901 // other `NUL` bytes exist. 902 unsafe { CStr::from_bytes_with_nul_unchecked(self.buf.as_slice()) } 903 } 904 } 905 906 impl DerefMut for CString { deref_mut(&mut self) -> &mut Self::Target907 fn deref_mut(&mut self) -> &mut Self::Target { 908 // SAFETY: A `CString` is always NUL-terminated and contains no other 909 // NUL bytes. 910 unsafe { CStr::from_bytes_with_nul_unchecked_mut(self.buf.as_mut_slice()) } 911 } 912 } 913 914 impl<'a> TryFrom<&'a CStr> for CString { 915 type Error = AllocError; 916 try_from(cstr: &'a CStr) -> Result<CString, AllocError>917 fn try_from(cstr: &'a CStr) -> Result<CString, AllocError> { 918 let mut buf = KVec::new(); 919 920 buf.extend_from_slice(cstr.as_bytes_with_nul(), GFP_KERNEL)?; 921 922 // INVARIANT: The `CStr` and `CString` types have the same invariants for 923 // the string data, and we copied it over without changes. 924 Ok(CString { buf }) 925 } 926 } 927 928 impl fmt::Debug for CString { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result929 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 930 fmt::Debug::fmt(&**self, f) 931 } 932 } 933 934 /// A convenience alias for [`core::format_args`]. 935 #[macro_export] 936 macro_rules! fmt { 937 ($($f:tt)*) => ( ::core::format_args!($($f)*) ) 938 } 939