xref: /linux/lib/string_helpers.c (revision 83bce9c2baa51e439480a713119a73d3c8b61083)
1 /*
2  * Helpers for formatting and printing strings
3  *
4  * Copyright 31 August 2008 James Bottomley
5  * Copyright (C) 2013, Intel Corporation
6  */
7 #include <linux/bug.h>
8 #include <linux/kernel.h>
9 #include <linux/math64.h>
10 #include <linux/export.h>
11 #include <linux/ctype.h>
12 #include <linux/errno.h>
13 #include <linux/fs.h>
14 #include <linux/limits.h>
15 #include <linux/mm.h>
16 #include <linux/slab.h>
17 #include <linux/string.h>
18 #include <linux/string_helpers.h>
19 
20 /**
21  * string_get_size - get the size in the specified units
22  * @size:	The size to be converted in blocks
23  * @blk_size:	Size of the block (use 1 for size in bytes)
24  * @units:	units to use (powers of 1000 or 1024)
25  * @buf:	buffer to format to
26  * @len:	length of buffer
27  *
28  * This function returns a string formatted to 3 significant figures
29  * giving the size in the required units.  @buf should have room for
30  * at least 9 bytes and will always be zero terminated.
31  *
32  */
33 void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
34 		     char *buf, int len)
35 {
36 	static const char *const units_10[] = {
37 		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
38 	};
39 	static const char *const units_2[] = {
40 		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
41 	};
42 	static const char *const *const units_str[] = {
43 		[STRING_UNITS_10] = units_10,
44 		[STRING_UNITS_2] = units_2,
45 	};
46 	static const unsigned int divisor[] = {
47 		[STRING_UNITS_10] = 1000,
48 		[STRING_UNITS_2] = 1024,
49 	};
50 	static const unsigned int rounding[] = { 500, 50, 5 };
51 	int i = 0, j;
52 	u32 remainder = 0, sf_cap;
53 	char tmp[8];
54 	const char *unit;
55 
56 	tmp[0] = '\0';
57 
58 	if (blk_size == 0)
59 		size = 0;
60 	if (size == 0)
61 		goto out;
62 
63 	/* This is Napier's algorithm.  Reduce the original block size to
64 	 *
65 	 * coefficient * divisor[units]^i
66 	 *
67 	 * we do the reduction so both coefficients are just under 32 bits so
68 	 * that multiplying them together won't overflow 64 bits and we keep
69 	 * as much precision as possible in the numbers.
70 	 *
71 	 * Note: it's safe to throw away the remainders here because all the
72 	 * precision is in the coefficients.
73 	 */
74 	while (blk_size >> 32) {
75 		do_div(blk_size, divisor[units]);
76 		i++;
77 	}
78 
79 	while (size >> 32) {
80 		do_div(size, divisor[units]);
81 		i++;
82 	}
83 
84 	/* now perform the actual multiplication keeping i as the sum of the
85 	 * two logarithms */
86 	size *= blk_size;
87 
88 	/* and logarithmically reduce it until it's just under the divisor */
89 	while (size >= divisor[units]) {
90 		remainder = do_div(size, divisor[units]);
91 		i++;
92 	}
93 
94 	/* work out in j how many digits of precision we need from the
95 	 * remainder */
96 	sf_cap = size;
97 	for (j = 0; sf_cap*10 < 1000; j++)
98 		sf_cap *= 10;
99 
100 	if (units == STRING_UNITS_2) {
101 		/* express the remainder as a decimal.  It's currently the
102 		 * numerator of a fraction whose denominator is
103 		 * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
104 		remainder *= 1000;
105 		remainder >>= 10;
106 	}
107 
108 	/* add a 5 to the digit below what will be printed to ensure
109 	 * an arithmetical round up and carry it through to size */
110 	remainder += rounding[j];
111 	if (remainder >= 1000) {
112 		remainder -= 1000;
113 		size += 1;
114 	}
115 
116 	if (j) {
117 		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
118 		tmp[j+1] = '\0';
119 	}
120 
121  out:
122 	if (i >= ARRAY_SIZE(units_2))
123 		unit = "UNK";
124 	else
125 		unit = units_str[units][i];
126 
127 	snprintf(buf, len, "%u%s %s", (u32)size,
128 		 tmp, unit);
129 }
130 EXPORT_SYMBOL(string_get_size);
131 
132 static bool unescape_space(char **src, char **dst)
133 {
134 	char *p = *dst, *q = *src;
135 
136 	switch (*q) {
137 	case 'n':
138 		*p = '\n';
139 		break;
140 	case 'r':
141 		*p = '\r';
142 		break;
143 	case 't':
144 		*p = '\t';
145 		break;
146 	case 'v':
147 		*p = '\v';
148 		break;
149 	case 'f':
150 		*p = '\f';
151 		break;
152 	default:
153 		return false;
154 	}
155 	*dst += 1;
156 	*src += 1;
157 	return true;
158 }
159 
160 static bool unescape_octal(char **src, char **dst)
161 {
162 	char *p = *dst, *q = *src;
163 	u8 num;
164 
165 	if (isodigit(*q) == 0)
166 		return false;
167 
168 	num = (*q++) & 7;
169 	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
170 		num <<= 3;
171 		num += (*q++) & 7;
172 	}
173 	*p = num;
174 	*dst += 1;
175 	*src = q;
176 	return true;
177 }
178 
179 static bool unescape_hex(char **src, char **dst)
180 {
181 	char *p = *dst, *q = *src;
182 	int digit;
183 	u8 num;
184 
185 	if (*q++ != 'x')
186 		return false;
187 
188 	num = digit = hex_to_bin(*q++);
189 	if (digit < 0)
190 		return false;
191 
192 	digit = hex_to_bin(*q);
193 	if (digit >= 0) {
194 		q++;
195 		num = (num << 4) | digit;
196 	}
197 	*p = num;
198 	*dst += 1;
199 	*src = q;
200 	return true;
201 }
202 
203 static bool unescape_special(char **src, char **dst)
204 {
205 	char *p = *dst, *q = *src;
206 
207 	switch (*q) {
208 	case '\"':
209 		*p = '\"';
210 		break;
211 	case '\\':
212 		*p = '\\';
213 		break;
214 	case 'a':
215 		*p = '\a';
216 		break;
217 	case 'e':
218 		*p = '\e';
219 		break;
220 	default:
221 		return false;
222 	}
223 	*dst += 1;
224 	*src += 1;
225 	return true;
226 }
227 
228 /**
229  * string_unescape - unquote characters in the given string
230  * @src:	source buffer (escaped)
231  * @dst:	destination buffer (unescaped)
232  * @size:	size of the destination buffer (0 to unlimit)
233  * @flags:	combination of the flags (bitwise OR):
234  *	%UNESCAPE_SPACE:
235  *		'\f' - form feed
236  *		'\n' - new line
237  *		'\r' - carriage return
238  *		'\t' - horizontal tab
239  *		'\v' - vertical tab
240  *	%UNESCAPE_OCTAL:
241  *		'\NNN' - byte with octal value NNN (1 to 3 digits)
242  *	%UNESCAPE_HEX:
243  *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
244  *	%UNESCAPE_SPECIAL:
245  *		'\"' - double quote
246  *		'\\' - backslash
247  *		'\a' - alert (BEL)
248  *		'\e' - escape
249  *	%UNESCAPE_ANY:
250  *		all previous together
251  *
252  * Description:
253  * The function unquotes characters in the given string.
254  *
255  * Because the size of the output will be the same as or less than the size of
256  * the input, the transformation may be performed in place.
257  *
258  * Caller must provide valid source and destination pointers. Be aware that
259  * destination buffer will always be NULL-terminated. Source string must be
260  * NULL-terminated as well.
261  *
262  * Return:
263  * The amount of the characters processed to the destination buffer excluding
264  * trailing '\0' is returned.
265  */
266 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
267 {
268 	char *out = dst;
269 
270 	while (*src && --size) {
271 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
272 			src++;
273 			size--;
274 
275 			if (flags & UNESCAPE_SPACE &&
276 					unescape_space(&src, &out))
277 				continue;
278 
279 			if (flags & UNESCAPE_OCTAL &&
280 					unescape_octal(&src, &out))
281 				continue;
282 
283 			if (flags & UNESCAPE_HEX &&
284 					unescape_hex(&src, &out))
285 				continue;
286 
287 			if (flags & UNESCAPE_SPECIAL &&
288 					unescape_special(&src, &out))
289 				continue;
290 
291 			*out++ = '\\';
292 		}
293 		*out++ = *src++;
294 	}
295 	*out = '\0';
296 
297 	return out - dst;
298 }
299 EXPORT_SYMBOL(string_unescape);
300 
301 static bool escape_passthrough(unsigned char c, char **dst, char *end)
302 {
303 	char *out = *dst;
304 
305 	if (out < end)
306 		*out = c;
307 	*dst = out + 1;
308 	return true;
309 }
310 
311 static bool escape_space(unsigned char c, char **dst, char *end)
312 {
313 	char *out = *dst;
314 	unsigned char to;
315 
316 	switch (c) {
317 	case '\n':
318 		to = 'n';
319 		break;
320 	case '\r':
321 		to = 'r';
322 		break;
323 	case '\t':
324 		to = 't';
325 		break;
326 	case '\v':
327 		to = 'v';
328 		break;
329 	case '\f':
330 		to = 'f';
331 		break;
332 	default:
333 		return false;
334 	}
335 
336 	if (out < end)
337 		*out = '\\';
338 	++out;
339 	if (out < end)
340 		*out = to;
341 	++out;
342 
343 	*dst = out;
344 	return true;
345 }
346 
347 static bool escape_special(unsigned char c, char **dst, char *end)
348 {
349 	char *out = *dst;
350 	unsigned char to;
351 
352 	switch (c) {
353 	case '\\':
354 		to = '\\';
355 		break;
356 	case '\a':
357 		to = 'a';
358 		break;
359 	case '\e':
360 		to = 'e';
361 		break;
362 	default:
363 		return false;
364 	}
365 
366 	if (out < end)
367 		*out = '\\';
368 	++out;
369 	if (out < end)
370 		*out = to;
371 	++out;
372 
373 	*dst = out;
374 	return true;
375 }
376 
377 static bool escape_null(unsigned char c, char **dst, char *end)
378 {
379 	char *out = *dst;
380 
381 	if (c)
382 		return false;
383 
384 	if (out < end)
385 		*out = '\\';
386 	++out;
387 	if (out < end)
388 		*out = '0';
389 	++out;
390 
391 	*dst = out;
392 	return true;
393 }
394 
395 static bool escape_octal(unsigned char c, char **dst, char *end)
396 {
397 	char *out = *dst;
398 
399 	if (out < end)
400 		*out = '\\';
401 	++out;
402 	if (out < end)
403 		*out = ((c >> 6) & 0x07) + '0';
404 	++out;
405 	if (out < end)
406 		*out = ((c >> 3) & 0x07) + '0';
407 	++out;
408 	if (out < end)
409 		*out = ((c >> 0) & 0x07) + '0';
410 	++out;
411 
412 	*dst = out;
413 	return true;
414 }
415 
416 static bool escape_hex(unsigned char c, char **dst, char *end)
417 {
418 	char *out = *dst;
419 
420 	if (out < end)
421 		*out = '\\';
422 	++out;
423 	if (out < end)
424 		*out = 'x';
425 	++out;
426 	if (out < end)
427 		*out = hex_asc_hi(c);
428 	++out;
429 	if (out < end)
430 		*out = hex_asc_lo(c);
431 	++out;
432 
433 	*dst = out;
434 	return true;
435 }
436 
437 /**
438  * string_escape_mem - quote characters in the given memory buffer
439  * @src:	source buffer (unescaped)
440  * @isz:	source buffer size
441  * @dst:	destination buffer (escaped)
442  * @osz:	destination buffer size
443  * @flags:	combination of the flags (bitwise OR):
444  *	%ESCAPE_SPACE: (special white space, not space itself)
445  *		'\f' - form feed
446  *		'\n' - new line
447  *		'\r' - carriage return
448  *		'\t' - horizontal tab
449  *		'\v' - vertical tab
450  *	%ESCAPE_SPECIAL:
451  *		'\\' - backslash
452  *		'\a' - alert (BEL)
453  *		'\e' - escape
454  *	%ESCAPE_NULL:
455  *		'\0' - null
456  *	%ESCAPE_OCTAL:
457  *		'\NNN' - byte with octal value NNN (3 digits)
458  *	%ESCAPE_ANY:
459  *		all previous together
460  *	%ESCAPE_NP:
461  *		escape only non-printable characters (checked by isprint)
462  *	%ESCAPE_ANY_NP:
463  *		all previous together
464  *	%ESCAPE_HEX:
465  *		'\xHH' - byte with hexadecimal value HH (2 digits)
466  * @only:	NULL-terminated string containing characters used to limit
467  *		the selected escape class. If characters are included in @only
468  *		that would not normally be escaped by the classes selected
469  *		in @flags, they will be copied to @dst unescaped.
470  *
471  * Description:
472  * The process of escaping byte buffer includes several parts. They are applied
473  * in the following sequence.
474  *	1. The character is matched to the printable class, if asked, and in
475  *	   case of match it passes through to the output.
476  *	2. The character is not matched to the one from @only string and thus
477  *	   must go as-is to the output.
478  *	3. The character is checked if it falls into the class given by @flags.
479  *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
480  *	   character. Note that they actually can't go together, otherwise
481  *	   %ESCAPE_HEX will be ignored.
482  *
483  * Caller must provide valid source and destination pointers. Be aware that
484  * destination buffer will not be NULL-terminated, thus caller have to append
485  * it if needs.
486  *
487  * Return:
488  * The total size of the escaped output that would be generated for
489  * the given input and flags. To check whether the output was
490  * truncated, compare the return value to osz. There is room left in
491  * dst for a '\0' terminator if and only if ret < osz.
492  */
493 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
494 		      unsigned int flags, const char *only)
495 {
496 	char *p = dst;
497 	char *end = p + osz;
498 	bool is_dict = only && *only;
499 
500 	while (isz--) {
501 		unsigned char c = *src++;
502 
503 		/*
504 		 * Apply rules in the following sequence:
505 		 *	- the character is printable, when @flags has
506 		 *	  %ESCAPE_NP bit set
507 		 *	- the @only string is supplied and does not contain a
508 		 *	  character under question
509 		 *	- the character doesn't fall into a class of symbols
510 		 *	  defined by given @flags
511 		 * In these cases we just pass through a character to the
512 		 * output buffer.
513 		 */
514 		if ((flags & ESCAPE_NP && isprint(c)) ||
515 		    (is_dict && !strchr(only, c))) {
516 			/* do nothing */
517 		} else {
518 			if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
519 				continue;
520 
521 			if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
522 				continue;
523 
524 			if (flags & ESCAPE_NULL && escape_null(c, &p, end))
525 				continue;
526 
527 			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
528 			if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
529 				continue;
530 
531 			if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
532 				continue;
533 		}
534 
535 		escape_passthrough(c, &p, end);
536 	}
537 
538 	return p - dst;
539 }
540 EXPORT_SYMBOL(string_escape_mem);
541 
542 /*
543  * Return an allocated string that has been escaped of special characters
544  * and double quotes, making it safe to log in quotes.
545  */
546 char *kstrdup_quotable(const char *src, gfp_t gfp)
547 {
548 	size_t slen, dlen;
549 	char *dst;
550 	const int flags = ESCAPE_HEX;
551 	const char esc[] = "\f\n\r\t\v\a\e\\\"";
552 
553 	if (!src)
554 		return NULL;
555 	slen = strlen(src);
556 
557 	dlen = string_escape_mem(src, slen, NULL, 0, flags, esc);
558 	dst = kmalloc(dlen + 1, gfp);
559 	if (!dst)
560 		return NULL;
561 
562 	WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen);
563 	dst[dlen] = '\0';
564 
565 	return dst;
566 }
567 EXPORT_SYMBOL_GPL(kstrdup_quotable);
568 
569 /*
570  * Returns allocated NULL-terminated string containing process
571  * command line, with inter-argument NULLs replaced with spaces,
572  * and other special characters escaped.
573  */
574 char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)
575 {
576 	char *buffer, *quoted;
577 	int i, res;
578 
579 	buffer = kmalloc(PAGE_SIZE, GFP_TEMPORARY);
580 	if (!buffer)
581 		return NULL;
582 
583 	res = get_cmdline(task, buffer, PAGE_SIZE - 1);
584 	buffer[res] = '\0';
585 
586 	/* Collapse trailing NULLs, leave res pointing to last non-NULL. */
587 	while (--res >= 0 && buffer[res] == '\0')
588 		;
589 
590 	/* Replace inter-argument NULLs. */
591 	for (i = 0; i <= res; i++)
592 		if (buffer[i] == '\0')
593 			buffer[i] = ' ';
594 
595 	/* Make sure result is printable. */
596 	quoted = kstrdup_quotable(buffer, gfp);
597 	kfree(buffer);
598 	return quoted;
599 }
600 EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline);
601 
602 /*
603  * Returns allocated NULL-terminated string containing pathname,
604  * with special characters escaped, able to be safely logged. If
605  * there is an error, the leading character will be "<".
606  */
607 char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
608 {
609 	char *temp, *pathname;
610 
611 	if (!file)
612 		return kstrdup("<unknown>", gfp);
613 
614 	/* We add 11 spaces for ' (deleted)' to be appended */
615 	temp = kmalloc(PATH_MAX + 11, GFP_TEMPORARY);
616 	if (!temp)
617 		return kstrdup("<no_memory>", gfp);
618 
619 	pathname = file_path(file, temp, PATH_MAX + 11);
620 	if (IS_ERR(pathname))
621 		pathname = kstrdup("<too_long>", gfp);
622 	else
623 		pathname = kstrdup_quotable(pathname, gfp);
624 
625 	kfree(temp);
626 	return pathname;
627 }
628 EXPORT_SYMBOL_GPL(kstrdup_quotable_file);
629