xref: /linux/lib/string_helpers.c (revision 61cb9ac66b30374c7fd8a8b2a3c4f8f432c72e36)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Helpers for formatting and printing strings
4  *
5  * Copyright 31 August 2008 James Bottomley
6  * Copyright (C) 2013, Intel Corporation
7  */
8 #include <linux/bug.h>
9 #include <linux/kernel.h>
10 #include <linux/math64.h>
11 #include <linux/export.h>
12 #include <linux/ctype.h>
13 #include <linux/errno.h>
14 #include <linux/fs.h>
15 #include <linux/limits.h>
16 #include <linux/mm.h>
17 #include <linux/slab.h>
18 #include <linux/string.h>
19 #include <linux/string_helpers.h>
20 
21 /**
22  * string_get_size - get the size in the specified units
23  * @size:	The size to be converted in blocks
24  * @blk_size:	Size of the block (use 1 for size in bytes)
25  * @units:	units to use (powers of 1000 or 1024)
26  * @buf:	buffer to format to
27  * @len:	length of buffer
28  *
29  * This function returns a string formatted to 3 significant figures
30  * giving the size in the required units.  @buf should have room for
31  * at least 9 bytes and will always be zero terminated.
32  *
33  */
34 void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
35 		     char *buf, int len)
36 {
37 	static const char *const units_10[] = {
38 		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
39 	};
40 	static const char *const units_2[] = {
41 		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
42 	};
43 	static const char *const *const units_str[] = {
44 		[STRING_UNITS_10] = units_10,
45 		[STRING_UNITS_2] = units_2,
46 	};
47 	static const unsigned int divisor[] = {
48 		[STRING_UNITS_10] = 1000,
49 		[STRING_UNITS_2] = 1024,
50 	};
51 	static const unsigned int rounding[] = { 500, 50, 5 };
52 	int i = 0, j;
53 	u32 remainder = 0, sf_cap;
54 	char tmp[8];
55 	const char *unit;
56 
57 	tmp[0] = '\0';
58 
59 	if (blk_size == 0)
60 		size = 0;
61 	if (size == 0)
62 		goto out;
63 
64 	/* This is Napier's algorithm.  Reduce the original block size to
65 	 *
66 	 * coefficient * divisor[units]^i
67 	 *
68 	 * we do the reduction so both coefficients are just under 32 bits so
69 	 * that multiplying them together won't overflow 64 bits and we keep
70 	 * as much precision as possible in the numbers.
71 	 *
72 	 * Note: it's safe to throw away the remainders here because all the
73 	 * precision is in the coefficients.
74 	 */
75 	while (blk_size >> 32) {
76 		do_div(blk_size, divisor[units]);
77 		i++;
78 	}
79 
80 	while (size >> 32) {
81 		do_div(size, divisor[units]);
82 		i++;
83 	}
84 
85 	/* now perform the actual multiplication keeping i as the sum of the
86 	 * two logarithms */
87 	size *= blk_size;
88 
89 	/* and logarithmically reduce it until it's just under the divisor */
90 	while (size >= divisor[units]) {
91 		remainder = do_div(size, divisor[units]);
92 		i++;
93 	}
94 
95 	/* work out in j how many digits of precision we need from the
96 	 * remainder */
97 	sf_cap = size;
98 	for (j = 0; sf_cap*10 < 1000; j++)
99 		sf_cap *= 10;
100 
101 	if (units == STRING_UNITS_2) {
102 		/* express the remainder as a decimal.  It's currently the
103 		 * numerator of a fraction whose denominator is
104 		 * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
105 		remainder *= 1000;
106 		remainder >>= 10;
107 	}
108 
109 	/* add a 5 to the digit below what will be printed to ensure
110 	 * an arithmetical round up and carry it through to size */
111 	remainder += rounding[j];
112 	if (remainder >= 1000) {
113 		remainder -= 1000;
114 		size += 1;
115 	}
116 
117 	if (j) {
118 		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
119 		tmp[j+1] = '\0';
120 	}
121 
122  out:
123 	if (i >= ARRAY_SIZE(units_2))
124 		unit = "UNK";
125 	else
126 		unit = units_str[units][i];
127 
128 	snprintf(buf, len, "%u%s %s", (u32)size,
129 		 tmp, unit);
130 }
131 EXPORT_SYMBOL(string_get_size);
132 
133 static bool unescape_space(char **src, char **dst)
134 {
135 	char *p = *dst, *q = *src;
136 
137 	switch (*q) {
138 	case 'n':
139 		*p = '\n';
140 		break;
141 	case 'r':
142 		*p = '\r';
143 		break;
144 	case 't':
145 		*p = '\t';
146 		break;
147 	case 'v':
148 		*p = '\v';
149 		break;
150 	case 'f':
151 		*p = '\f';
152 		break;
153 	default:
154 		return false;
155 	}
156 	*dst += 1;
157 	*src += 1;
158 	return true;
159 }
160 
161 static bool unescape_octal(char **src, char **dst)
162 {
163 	char *p = *dst, *q = *src;
164 	u8 num;
165 
166 	if (isodigit(*q) == 0)
167 		return false;
168 
169 	num = (*q++) & 7;
170 	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
171 		num <<= 3;
172 		num += (*q++) & 7;
173 	}
174 	*p = num;
175 	*dst += 1;
176 	*src = q;
177 	return true;
178 }
179 
180 static bool unescape_hex(char **src, char **dst)
181 {
182 	char *p = *dst, *q = *src;
183 	int digit;
184 	u8 num;
185 
186 	if (*q++ != 'x')
187 		return false;
188 
189 	num = digit = hex_to_bin(*q++);
190 	if (digit < 0)
191 		return false;
192 
193 	digit = hex_to_bin(*q);
194 	if (digit >= 0) {
195 		q++;
196 		num = (num << 4) | digit;
197 	}
198 	*p = num;
199 	*dst += 1;
200 	*src = q;
201 	return true;
202 }
203 
204 static bool unescape_special(char **src, char **dst)
205 {
206 	char *p = *dst, *q = *src;
207 
208 	switch (*q) {
209 	case '\"':
210 		*p = '\"';
211 		break;
212 	case '\\':
213 		*p = '\\';
214 		break;
215 	case 'a':
216 		*p = '\a';
217 		break;
218 	case 'e':
219 		*p = '\e';
220 		break;
221 	default:
222 		return false;
223 	}
224 	*dst += 1;
225 	*src += 1;
226 	return true;
227 }
228 
229 /**
230  * string_unescape - unquote characters in the given string
231  * @src:	source buffer (escaped)
232  * @dst:	destination buffer (unescaped)
233  * @size:	size of the destination buffer (0 to unlimit)
234  * @flags:	combination of the flags.
235  *
236  * Description:
237  * The function unquotes characters in the given string.
238  *
239  * Because the size of the output will be the same as or less than the size of
240  * the input, the transformation may be performed in place.
241  *
242  * Caller must provide valid source and destination pointers. Be aware that
243  * destination buffer will always be NULL-terminated. Source string must be
244  * NULL-terminated as well.  The supported flags are::
245  *
246  *	UNESCAPE_SPACE:
247  *		'\f' - form feed
248  *		'\n' - new line
249  *		'\r' - carriage return
250  *		'\t' - horizontal tab
251  *		'\v' - vertical tab
252  *	UNESCAPE_OCTAL:
253  *		'\NNN' - byte with octal value NNN (1 to 3 digits)
254  *	UNESCAPE_HEX:
255  *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
256  *	UNESCAPE_SPECIAL:
257  *		'\"' - double quote
258  *		'\\' - backslash
259  *		'\a' - alert (BEL)
260  *		'\e' - escape
261  *	UNESCAPE_ANY:
262  *		all previous together
263  *
264  * Return:
265  * The amount of the characters processed to the destination buffer excluding
266  * trailing '\0' is returned.
267  */
268 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
269 {
270 	char *out = dst;
271 
272 	while (*src && --size) {
273 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
274 			src++;
275 			size--;
276 
277 			if (flags & UNESCAPE_SPACE &&
278 					unescape_space(&src, &out))
279 				continue;
280 
281 			if (flags & UNESCAPE_OCTAL &&
282 					unescape_octal(&src, &out))
283 				continue;
284 
285 			if (flags & UNESCAPE_HEX &&
286 					unescape_hex(&src, &out))
287 				continue;
288 
289 			if (flags & UNESCAPE_SPECIAL &&
290 					unescape_special(&src, &out))
291 				continue;
292 
293 			*out++ = '\\';
294 		}
295 		*out++ = *src++;
296 	}
297 	*out = '\0';
298 
299 	return out - dst;
300 }
301 EXPORT_SYMBOL(string_unescape);
302 
303 static bool escape_passthrough(unsigned char c, char **dst, char *end)
304 {
305 	char *out = *dst;
306 
307 	if (out < end)
308 		*out = c;
309 	*dst = out + 1;
310 	return true;
311 }
312 
313 static bool escape_space(unsigned char c, char **dst, char *end)
314 {
315 	char *out = *dst;
316 	unsigned char to;
317 
318 	switch (c) {
319 	case '\n':
320 		to = 'n';
321 		break;
322 	case '\r':
323 		to = 'r';
324 		break;
325 	case '\t':
326 		to = 't';
327 		break;
328 	case '\v':
329 		to = 'v';
330 		break;
331 	case '\f':
332 		to = 'f';
333 		break;
334 	default:
335 		return false;
336 	}
337 
338 	if (out < end)
339 		*out = '\\';
340 	++out;
341 	if (out < end)
342 		*out = to;
343 	++out;
344 
345 	*dst = out;
346 	return true;
347 }
348 
349 static bool escape_special(unsigned char c, char **dst, char *end)
350 {
351 	char *out = *dst;
352 	unsigned char to;
353 
354 	switch (c) {
355 	case '\\':
356 		to = '\\';
357 		break;
358 	case '\a':
359 		to = 'a';
360 		break;
361 	case '\e':
362 		to = 'e';
363 		break;
364 	case '"':
365 		to = '"';
366 		break;
367 	default:
368 		return false;
369 	}
370 
371 	if (out < end)
372 		*out = '\\';
373 	++out;
374 	if (out < end)
375 		*out = to;
376 	++out;
377 
378 	*dst = out;
379 	return true;
380 }
381 
382 static bool escape_null(unsigned char c, char **dst, char *end)
383 {
384 	char *out = *dst;
385 
386 	if (c)
387 		return false;
388 
389 	if (out < end)
390 		*out = '\\';
391 	++out;
392 	if (out < end)
393 		*out = '0';
394 	++out;
395 
396 	*dst = out;
397 	return true;
398 }
399 
400 static bool escape_octal(unsigned char c, char **dst, char *end)
401 {
402 	char *out = *dst;
403 
404 	if (out < end)
405 		*out = '\\';
406 	++out;
407 	if (out < end)
408 		*out = ((c >> 6) & 0x07) + '0';
409 	++out;
410 	if (out < end)
411 		*out = ((c >> 3) & 0x07) + '0';
412 	++out;
413 	if (out < end)
414 		*out = ((c >> 0) & 0x07) + '0';
415 	++out;
416 
417 	*dst = out;
418 	return true;
419 }
420 
421 static bool escape_hex(unsigned char c, char **dst, char *end)
422 {
423 	char *out = *dst;
424 
425 	if (out < end)
426 		*out = '\\';
427 	++out;
428 	if (out < end)
429 		*out = 'x';
430 	++out;
431 	if (out < end)
432 		*out = hex_asc_hi(c);
433 	++out;
434 	if (out < end)
435 		*out = hex_asc_lo(c);
436 	++out;
437 
438 	*dst = out;
439 	return true;
440 }
441 
442 /**
443  * string_escape_mem - quote characters in the given memory buffer
444  * @src:	source buffer (unescaped)
445  * @isz:	source buffer size
446  * @dst:	destination buffer (escaped)
447  * @osz:	destination buffer size
448  * @flags:	combination of the flags
449  * @only:	NULL-terminated string containing characters used to limit
450  *		the selected escape class. If characters are included in @only
451  *		that would not normally be escaped by the classes selected
452  *		in @flags, they will be copied to @dst unescaped.
453  *
454  * Description:
455  * The process of escaping byte buffer includes several parts. They are applied
456  * in the following sequence.
457  *
458  *	1. The character is not matched to the one from @only string and thus
459  *	   must go as-is to the output.
460  *	2. The character is matched to the printable and ASCII classes, if asked,
461  *	   and in case of match it passes through to the output.
462  *	3. The character is matched to the printable or ASCII class, if asked,
463  *	   and in case of match it passes through to the output.
464  *	4. The character is checked if it falls into the class given by @flags.
465  *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
466  *	   character. Note that they actually can't go together, otherwise
467  *	   %ESCAPE_HEX will be ignored.
468  *
469  * Caller must provide valid source and destination pointers. Be aware that
470  * destination buffer will not be NULL-terminated, thus caller have to append
471  * it if needs. The supported flags are::
472  *
473  *	%ESCAPE_SPACE: (special white space, not space itself)
474  *		'\f' - form feed
475  *		'\n' - new line
476  *		'\r' - carriage return
477  *		'\t' - horizontal tab
478  *		'\v' - vertical tab
479  *	%ESCAPE_SPECIAL:
480  *		'\"' - double quote
481  *		'\\' - backslash
482  *		'\a' - alert (BEL)
483  *		'\e' - escape
484  *	%ESCAPE_NULL:
485  *		'\0' - null
486  *	%ESCAPE_OCTAL:
487  *		'\NNN' - byte with octal value NNN (3 digits)
488  *	%ESCAPE_ANY:
489  *		all previous together
490  *	%ESCAPE_NP:
491  *		escape only non-printable characters, checked by isprint()
492  *	%ESCAPE_ANY_NP:
493  *		all previous together
494  *	%ESCAPE_HEX:
495  *		'\xHH' - byte with hexadecimal value HH (2 digits)
496  *	%ESCAPE_NA:
497  *		escape only non-ascii characters, checked by isascii()
498  *	%ESCAPE_NAP:
499  *		escape only non-printable or non-ascii characters
500  *	%ESCAPE_APPEND:
501  *		append characters from @only to be escaped by the given classes
502  *
503  * %ESCAPE_APPEND would help to pass additional characters to the escaped, when
504  * one of %ESCAPE_NP, %ESCAPE_NA, or %ESCAPE_NAP is provided.
505  *
506  * One notable caveat, the %ESCAPE_NAP, %ESCAPE_NP and %ESCAPE_NA have the
507  * higher priority than the rest of the flags (%ESCAPE_NAP is the highest).
508  * It doesn't make much sense to use either of them without %ESCAPE_OCTAL
509  * or %ESCAPE_HEX, because they cover most of the other character classes.
510  * %ESCAPE_NAP can utilize %ESCAPE_SPACE or %ESCAPE_SPECIAL in addition to
511  * the above.
512  *
513  * Return:
514  * The total size of the escaped output that would be generated for
515  * the given input and flags. To check whether the output was
516  * truncated, compare the return value to osz. There is room left in
517  * dst for a '\0' terminator if and only if ret < osz.
518  */
519 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
520 		      unsigned int flags, const char *only)
521 {
522 	char *p = dst;
523 	char *end = p + osz;
524 	bool is_dict = only && *only;
525 	bool is_append = flags & ESCAPE_APPEND;
526 
527 	while (isz--) {
528 		unsigned char c = *src++;
529 		bool in_dict = is_dict && strchr(only, c);
530 
531 		/*
532 		 * Apply rules in the following sequence:
533 		 *	- the @only string is supplied and does not contain a
534 		 *	  character under question
535 		 *	- the character is printable and ASCII, when @flags has
536 		 *	  %ESCAPE_NAP bit set
537 		 *	- the character is printable, when @flags has
538 		 *	  %ESCAPE_NP bit set
539 		 *	- the character is ASCII, when @flags has
540 		 *	  %ESCAPE_NA bit set
541 		 *	- the character doesn't fall into a class of symbols
542 		 *	  defined by given @flags
543 		 * In these cases we just pass through a character to the
544 		 * output buffer.
545 		 *
546 		 * When %ESCAPE_APPEND is passed, the characters from @only
547 		 * have been excluded from the %ESCAPE_NAP, %ESCAPE_NP, and
548 		 * %ESCAPE_NA cases.
549 		 */
550 		if (!(is_append || in_dict) && is_dict &&
551 					  escape_passthrough(c, &p, end))
552 			continue;
553 
554 		if (!(is_append && in_dict) && isascii(c) && isprint(c) &&
555 		    flags & ESCAPE_NAP && escape_passthrough(c, &p, end))
556 			continue;
557 
558 		if (!(is_append && in_dict) && isprint(c) &&
559 		    flags & ESCAPE_NP && escape_passthrough(c, &p, end))
560 			continue;
561 
562 		if (!(is_append && in_dict) && isascii(c) &&
563 		    flags & ESCAPE_NA && escape_passthrough(c, &p, end))
564 			continue;
565 
566 		if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
567 			continue;
568 
569 		if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
570 			continue;
571 
572 		if (flags & ESCAPE_NULL && escape_null(c, &p, end))
573 			continue;
574 
575 		/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
576 		if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
577 			continue;
578 
579 		if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
580 			continue;
581 
582 		escape_passthrough(c, &p, end);
583 	}
584 
585 	return p - dst;
586 }
587 EXPORT_SYMBOL(string_escape_mem);
588 
589 /*
590  * Return an allocated string that has been escaped of special characters
591  * and double quotes, making it safe to log in quotes.
592  */
593 char *kstrdup_quotable(const char *src, gfp_t gfp)
594 {
595 	size_t slen, dlen;
596 	char *dst;
597 	const int flags = ESCAPE_HEX;
598 	const char esc[] = "\f\n\r\t\v\a\e\\\"";
599 
600 	if (!src)
601 		return NULL;
602 	slen = strlen(src);
603 
604 	dlen = string_escape_mem(src, slen, NULL, 0, flags, esc);
605 	dst = kmalloc(dlen + 1, gfp);
606 	if (!dst)
607 		return NULL;
608 
609 	WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen);
610 	dst[dlen] = '\0';
611 
612 	return dst;
613 }
614 EXPORT_SYMBOL_GPL(kstrdup_quotable);
615 
616 /*
617  * Returns allocated NULL-terminated string containing process
618  * command line, with inter-argument NULLs replaced with spaces,
619  * and other special characters escaped.
620  */
621 char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)
622 {
623 	char *buffer, *quoted;
624 	int i, res;
625 
626 	buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
627 	if (!buffer)
628 		return NULL;
629 
630 	res = get_cmdline(task, buffer, PAGE_SIZE - 1);
631 	buffer[res] = '\0';
632 
633 	/* Collapse trailing NULLs, leave res pointing to last non-NULL. */
634 	while (--res >= 0 && buffer[res] == '\0')
635 		;
636 
637 	/* Replace inter-argument NULLs. */
638 	for (i = 0; i <= res; i++)
639 		if (buffer[i] == '\0')
640 			buffer[i] = ' ';
641 
642 	/* Make sure result is printable. */
643 	quoted = kstrdup_quotable(buffer, gfp);
644 	kfree(buffer);
645 	return quoted;
646 }
647 EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline);
648 
649 /*
650  * Returns allocated NULL-terminated string containing pathname,
651  * with special characters escaped, able to be safely logged. If
652  * there is an error, the leading character will be "<".
653  */
654 char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
655 {
656 	char *temp, *pathname;
657 
658 	if (!file)
659 		return kstrdup("<unknown>", gfp);
660 
661 	/* We add 11 spaces for ' (deleted)' to be appended */
662 	temp = kmalloc(PATH_MAX + 11, GFP_KERNEL);
663 	if (!temp)
664 		return kstrdup("<no_memory>", gfp);
665 
666 	pathname = file_path(file, temp, PATH_MAX + 11);
667 	if (IS_ERR(pathname))
668 		pathname = kstrdup("<too_long>", gfp);
669 	else
670 		pathname = kstrdup_quotable(pathname, gfp);
671 
672 	kfree(temp);
673 	return pathname;
674 }
675 EXPORT_SYMBOL_GPL(kstrdup_quotable_file);
676 
677 /**
678  * kfree_strarray - free a number of dynamically allocated strings contained
679  *                  in an array and the array itself
680  *
681  * @array: Dynamically allocated array of strings to free.
682  * @n: Number of strings (starting from the beginning of the array) to free.
683  *
684  * Passing a non-NULL @array and @n == 0 as well as NULL @array are valid
685  * use-cases. If @array is NULL, the function does nothing.
686  */
687 void kfree_strarray(char **array, size_t n)
688 {
689 	unsigned int i;
690 
691 	if (!array)
692 		return;
693 
694 	for (i = 0; i < n; i++)
695 		kfree(array[i]);
696 	kfree(array);
697 }
698 EXPORT_SYMBOL_GPL(kfree_strarray);
699