xref: /linux/lib/string_helpers.c (revision 00452ba9fdb5bf6fb5fea1dae5227b4bbed44fc4)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Helpers for formatting and printing strings
4   *
5   * Copyright 31 August 2008 James Bottomley
6   * Copyright (C) 2013, Intel Corporation
7   */
8  #include <linux/bug.h>
9  #include <linux/kernel.h>
10  #include <linux/math64.h>
11  #include <linux/export.h>
12  #include <linux/ctype.h>
13  #include <linux/errno.h>
14  #include <linux/fs.h>
15  #include <linux/limits.h>
16  #include <linux/mm.h>
17  #include <linux/slab.h>
18  #include <linux/string.h>
19  #include <linux/string_helpers.h>
20  
21  /**
22   * string_get_size - get the size in the specified units
23   * @size:	The size to be converted in blocks
24   * @blk_size:	Size of the block (use 1 for size in bytes)
25   * @units:	units to use (powers of 1000 or 1024)
26   * @buf:	buffer to format to
27   * @len:	length of buffer
28   *
29   * This function returns a string formatted to 3 significant figures
30   * giving the size in the required units.  @buf should have room for
31   * at least 9 bytes and will always be zero terminated.
32   *
33   */
34  void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
35  		     char *buf, int len)
36  {
37  	static const char *const units_10[] = {
38  		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
39  	};
40  	static const char *const units_2[] = {
41  		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
42  	};
43  	static const char *const *const units_str[] = {
44  		[STRING_UNITS_10] = units_10,
45  		[STRING_UNITS_2] = units_2,
46  	};
47  	static const unsigned int divisor[] = {
48  		[STRING_UNITS_10] = 1000,
49  		[STRING_UNITS_2] = 1024,
50  	};
51  	static const unsigned int rounding[] = { 500, 50, 5 };
52  	int i = 0, j;
53  	u32 remainder = 0, sf_cap;
54  	char tmp[8];
55  	const char *unit;
56  
57  	tmp[0] = '\0';
58  
59  	if (blk_size == 0)
60  		size = 0;
61  	if (size == 0)
62  		goto out;
63  
64  	/* This is Napier's algorithm.  Reduce the original block size to
65  	 *
66  	 * coefficient * divisor[units]^i
67  	 *
68  	 * we do the reduction so both coefficients are just under 32 bits so
69  	 * that multiplying them together won't overflow 64 bits and we keep
70  	 * as much precision as possible in the numbers.
71  	 *
72  	 * Note: it's safe to throw away the remainders here because all the
73  	 * precision is in the coefficients.
74  	 */
75  	while (blk_size >> 32) {
76  		do_div(blk_size, divisor[units]);
77  		i++;
78  	}
79  
80  	while (size >> 32) {
81  		do_div(size, divisor[units]);
82  		i++;
83  	}
84  
85  	/* now perform the actual multiplication keeping i as the sum of the
86  	 * two logarithms */
87  	size *= blk_size;
88  
89  	/* and logarithmically reduce it until it's just under the divisor */
90  	while (size >= divisor[units]) {
91  		remainder = do_div(size, divisor[units]);
92  		i++;
93  	}
94  
95  	/* work out in j how many digits of precision we need from the
96  	 * remainder */
97  	sf_cap = size;
98  	for (j = 0; sf_cap*10 < 1000; j++)
99  		sf_cap *= 10;
100  
101  	if (units == STRING_UNITS_2) {
102  		/* express the remainder as a decimal.  It's currently the
103  		 * numerator of a fraction whose denominator is
104  		 * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
105  		remainder *= 1000;
106  		remainder >>= 10;
107  	}
108  
109  	/* add a 5 to the digit below what will be printed to ensure
110  	 * an arithmetical round up and carry it through to size */
111  	remainder += rounding[j];
112  	if (remainder >= 1000) {
113  		remainder -= 1000;
114  		size += 1;
115  	}
116  
117  	if (j) {
118  		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
119  		tmp[j+1] = '\0';
120  	}
121  
122   out:
123  	if (i >= ARRAY_SIZE(units_2))
124  		unit = "UNK";
125  	else
126  		unit = units_str[units][i];
127  
128  	snprintf(buf, len, "%u%s %s", (u32)size,
129  		 tmp, unit);
130  }
131  EXPORT_SYMBOL(string_get_size);
132  
133  static bool unescape_space(char **src, char **dst)
134  {
135  	char *p = *dst, *q = *src;
136  
137  	switch (*q) {
138  	case 'n':
139  		*p = '\n';
140  		break;
141  	case 'r':
142  		*p = '\r';
143  		break;
144  	case 't':
145  		*p = '\t';
146  		break;
147  	case 'v':
148  		*p = '\v';
149  		break;
150  	case 'f':
151  		*p = '\f';
152  		break;
153  	default:
154  		return false;
155  	}
156  	*dst += 1;
157  	*src += 1;
158  	return true;
159  }
160  
161  static bool unescape_octal(char **src, char **dst)
162  {
163  	char *p = *dst, *q = *src;
164  	u8 num;
165  
166  	if (isodigit(*q) == 0)
167  		return false;
168  
169  	num = (*q++) & 7;
170  	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
171  		num <<= 3;
172  		num += (*q++) & 7;
173  	}
174  	*p = num;
175  	*dst += 1;
176  	*src = q;
177  	return true;
178  }
179  
180  static bool unescape_hex(char **src, char **dst)
181  {
182  	char *p = *dst, *q = *src;
183  	int digit;
184  	u8 num;
185  
186  	if (*q++ != 'x')
187  		return false;
188  
189  	num = digit = hex_to_bin(*q++);
190  	if (digit < 0)
191  		return false;
192  
193  	digit = hex_to_bin(*q);
194  	if (digit >= 0) {
195  		q++;
196  		num = (num << 4) | digit;
197  	}
198  	*p = num;
199  	*dst += 1;
200  	*src = q;
201  	return true;
202  }
203  
204  static bool unescape_special(char **src, char **dst)
205  {
206  	char *p = *dst, *q = *src;
207  
208  	switch (*q) {
209  	case '\"':
210  		*p = '\"';
211  		break;
212  	case '\\':
213  		*p = '\\';
214  		break;
215  	case 'a':
216  		*p = '\a';
217  		break;
218  	case 'e':
219  		*p = '\e';
220  		break;
221  	default:
222  		return false;
223  	}
224  	*dst += 1;
225  	*src += 1;
226  	return true;
227  }
228  
229  /**
230   * string_unescape - unquote characters in the given string
231   * @src:	source buffer (escaped)
232   * @dst:	destination buffer (unescaped)
233   * @size:	size of the destination buffer (0 to unlimit)
234   * @flags:	combination of the flags.
235   *
236   * Description:
237   * The function unquotes characters in the given string.
238   *
239   * Because the size of the output will be the same as or less than the size of
240   * the input, the transformation may be performed in place.
241   *
242   * Caller must provide valid source and destination pointers. Be aware that
243   * destination buffer will always be NULL-terminated. Source string must be
244   * NULL-terminated as well.  The supported flags are::
245   *
246   *	UNESCAPE_SPACE:
247   *		'\f' - form feed
248   *		'\n' - new line
249   *		'\r' - carriage return
250   *		'\t' - horizontal tab
251   *		'\v' - vertical tab
252   *	UNESCAPE_OCTAL:
253   *		'\NNN' - byte with octal value NNN (1 to 3 digits)
254   *	UNESCAPE_HEX:
255   *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
256   *	UNESCAPE_SPECIAL:
257   *		'\"' - double quote
258   *		'\\' - backslash
259   *		'\a' - alert (BEL)
260   *		'\e' - escape
261   *	UNESCAPE_ANY:
262   *		all previous together
263   *
264   * Return:
265   * The amount of the characters processed to the destination buffer excluding
266   * trailing '\0' is returned.
267   */
268  int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
269  {
270  	char *out = dst;
271  
272  	while (*src && --size) {
273  		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
274  			src++;
275  			size--;
276  
277  			if (flags & UNESCAPE_SPACE &&
278  					unescape_space(&src, &out))
279  				continue;
280  
281  			if (flags & UNESCAPE_OCTAL &&
282  					unescape_octal(&src, &out))
283  				continue;
284  
285  			if (flags & UNESCAPE_HEX &&
286  					unescape_hex(&src, &out))
287  				continue;
288  
289  			if (flags & UNESCAPE_SPECIAL &&
290  					unescape_special(&src, &out))
291  				continue;
292  
293  			*out++ = '\\';
294  		}
295  		*out++ = *src++;
296  	}
297  	*out = '\0';
298  
299  	return out - dst;
300  }
301  EXPORT_SYMBOL(string_unescape);
302  
303  static bool escape_passthrough(unsigned char c, char **dst, char *end)
304  {
305  	char *out = *dst;
306  
307  	if (out < end)
308  		*out = c;
309  	*dst = out + 1;
310  	return true;
311  }
312  
313  static bool escape_space(unsigned char c, char **dst, char *end)
314  {
315  	char *out = *dst;
316  	unsigned char to;
317  
318  	switch (c) {
319  	case '\n':
320  		to = 'n';
321  		break;
322  	case '\r':
323  		to = 'r';
324  		break;
325  	case '\t':
326  		to = 't';
327  		break;
328  	case '\v':
329  		to = 'v';
330  		break;
331  	case '\f':
332  		to = 'f';
333  		break;
334  	default:
335  		return false;
336  	}
337  
338  	if (out < end)
339  		*out = '\\';
340  	++out;
341  	if (out < end)
342  		*out = to;
343  	++out;
344  
345  	*dst = out;
346  	return true;
347  }
348  
349  static bool escape_special(unsigned char c, char **dst, char *end)
350  {
351  	char *out = *dst;
352  	unsigned char to;
353  
354  	switch (c) {
355  	case '\\':
356  		to = '\\';
357  		break;
358  	case '\a':
359  		to = 'a';
360  		break;
361  	case '\e':
362  		to = 'e';
363  		break;
364  	default:
365  		return false;
366  	}
367  
368  	if (out < end)
369  		*out = '\\';
370  	++out;
371  	if (out < end)
372  		*out = to;
373  	++out;
374  
375  	*dst = out;
376  	return true;
377  }
378  
379  static bool escape_null(unsigned char c, char **dst, char *end)
380  {
381  	char *out = *dst;
382  
383  	if (c)
384  		return false;
385  
386  	if (out < end)
387  		*out = '\\';
388  	++out;
389  	if (out < end)
390  		*out = '0';
391  	++out;
392  
393  	*dst = out;
394  	return true;
395  }
396  
397  static bool escape_octal(unsigned char c, char **dst, char *end)
398  {
399  	char *out = *dst;
400  
401  	if (out < end)
402  		*out = '\\';
403  	++out;
404  	if (out < end)
405  		*out = ((c >> 6) & 0x07) + '0';
406  	++out;
407  	if (out < end)
408  		*out = ((c >> 3) & 0x07) + '0';
409  	++out;
410  	if (out < end)
411  		*out = ((c >> 0) & 0x07) + '0';
412  	++out;
413  
414  	*dst = out;
415  	return true;
416  }
417  
418  static bool escape_hex(unsigned char c, char **dst, char *end)
419  {
420  	char *out = *dst;
421  
422  	if (out < end)
423  		*out = '\\';
424  	++out;
425  	if (out < end)
426  		*out = 'x';
427  	++out;
428  	if (out < end)
429  		*out = hex_asc_hi(c);
430  	++out;
431  	if (out < end)
432  		*out = hex_asc_lo(c);
433  	++out;
434  
435  	*dst = out;
436  	return true;
437  }
438  
439  /**
440   * string_escape_mem - quote characters in the given memory buffer
441   * @src:	source buffer (unescaped)
442   * @isz:	source buffer size
443   * @dst:	destination buffer (escaped)
444   * @osz:	destination buffer size
445   * @flags:	combination of the flags
446   * @only:	NULL-terminated string containing characters used to limit
447   *		the selected escape class. If characters are included in @only
448   *		that would not normally be escaped by the classes selected
449   *		in @flags, they will be copied to @dst unescaped.
450   *
451   * Description:
452   * The process of escaping byte buffer includes several parts. They are applied
453   * in the following sequence.
454   *
455   *	1. The character is matched to the printable class, if asked, and in
456   *	   case of match it passes through to the output.
457   *	2. The character is not matched to the one from @only string and thus
458   *	   must go as-is to the output.
459   *	3. The character is checked if it falls into the class given by @flags.
460   *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
461   *	   character. Note that they actually can't go together, otherwise
462   *	   %ESCAPE_HEX will be ignored.
463   *
464   * Caller must provide valid source and destination pointers. Be aware that
465   * destination buffer will not be NULL-terminated, thus caller have to append
466   * it if needs.   The supported flags are::
467   *
468   *	%ESCAPE_SPACE: (special white space, not space itself)
469   *		'\f' - form feed
470   *		'\n' - new line
471   *		'\r' - carriage return
472   *		'\t' - horizontal tab
473   *		'\v' - vertical tab
474   *	%ESCAPE_SPECIAL:
475   *		'\\' - backslash
476   *		'\a' - alert (BEL)
477   *		'\e' - escape
478   *	%ESCAPE_NULL:
479   *		'\0' - null
480   *	%ESCAPE_OCTAL:
481   *		'\NNN' - byte with octal value NNN (3 digits)
482   *	%ESCAPE_ANY:
483   *		all previous together
484   *	%ESCAPE_NP:
485   *		escape only non-printable characters (checked by isprint)
486   *	%ESCAPE_ANY_NP:
487   *		all previous together
488   *	%ESCAPE_HEX:
489   *		'\xHH' - byte with hexadecimal value HH (2 digits)
490   *
491   * Return:
492   * The total size of the escaped output that would be generated for
493   * the given input and flags. To check whether the output was
494   * truncated, compare the return value to osz. There is room left in
495   * dst for a '\0' terminator if and only if ret < osz.
496   */
497  int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
498  		      unsigned int flags, const char *only)
499  {
500  	char *p = dst;
501  	char *end = p + osz;
502  	bool is_dict = only && *only;
503  
504  	while (isz--) {
505  		unsigned char c = *src++;
506  
507  		/*
508  		 * Apply rules in the following sequence:
509  		 *	- the character is printable, when @flags has
510  		 *	  %ESCAPE_NP bit set
511  		 *	- the @only string is supplied and does not contain a
512  		 *	  character under question
513  		 *	- the character doesn't fall into a class of symbols
514  		 *	  defined by given @flags
515  		 * In these cases we just pass through a character to the
516  		 * output buffer.
517  		 */
518  		if ((flags & ESCAPE_NP && isprint(c)) ||
519  		    (is_dict && !strchr(only, c))) {
520  			/* do nothing */
521  		} else {
522  			if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
523  				continue;
524  
525  			if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
526  				continue;
527  
528  			if (flags & ESCAPE_NULL && escape_null(c, &p, end))
529  				continue;
530  
531  			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
532  			if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
533  				continue;
534  
535  			if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
536  				continue;
537  		}
538  
539  		escape_passthrough(c, &p, end);
540  	}
541  
542  	return p - dst;
543  }
544  EXPORT_SYMBOL(string_escape_mem);
545  
546  int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
547  					size_t osz)
548  {
549  	char *p = dst;
550  	char *end = p + osz;
551  
552  	while (isz--) {
553  		unsigned char c = *src++;
554  
555  		if (!isprint(c) || !isascii(c) || c == '"' || c == '\\')
556  			escape_hex(c, &p, end);
557  		else
558  			escape_passthrough(c, &p, end);
559  	}
560  
561  	return p - dst;
562  }
563  EXPORT_SYMBOL(string_escape_mem_ascii);
564  
565  /*
566   * Return an allocated string that has been escaped of special characters
567   * and double quotes, making it safe to log in quotes.
568   */
569  char *kstrdup_quotable(const char *src, gfp_t gfp)
570  {
571  	size_t slen, dlen;
572  	char *dst;
573  	const int flags = ESCAPE_HEX;
574  	const char esc[] = "\f\n\r\t\v\a\e\\\"";
575  
576  	if (!src)
577  		return NULL;
578  	slen = strlen(src);
579  
580  	dlen = string_escape_mem(src, slen, NULL, 0, flags, esc);
581  	dst = kmalloc(dlen + 1, gfp);
582  	if (!dst)
583  		return NULL;
584  
585  	WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen);
586  	dst[dlen] = '\0';
587  
588  	return dst;
589  }
590  EXPORT_SYMBOL_GPL(kstrdup_quotable);
591  
592  /*
593   * Returns allocated NULL-terminated string containing process
594   * command line, with inter-argument NULLs replaced with spaces,
595   * and other special characters escaped.
596   */
597  char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)
598  {
599  	char *buffer, *quoted;
600  	int i, res;
601  
602  	buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
603  	if (!buffer)
604  		return NULL;
605  
606  	res = get_cmdline(task, buffer, PAGE_SIZE - 1);
607  	buffer[res] = '\0';
608  
609  	/* Collapse trailing NULLs, leave res pointing to last non-NULL. */
610  	while (--res >= 0 && buffer[res] == '\0')
611  		;
612  
613  	/* Replace inter-argument NULLs. */
614  	for (i = 0; i <= res; i++)
615  		if (buffer[i] == '\0')
616  			buffer[i] = ' ';
617  
618  	/* Make sure result is printable. */
619  	quoted = kstrdup_quotable(buffer, gfp);
620  	kfree(buffer);
621  	return quoted;
622  }
623  EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline);
624  
625  /*
626   * Returns allocated NULL-terminated string containing pathname,
627   * with special characters escaped, able to be safely logged. If
628   * there is an error, the leading character will be "<".
629   */
630  char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
631  {
632  	char *temp, *pathname;
633  
634  	if (!file)
635  		return kstrdup("<unknown>", gfp);
636  
637  	/* We add 11 spaces for ' (deleted)' to be appended */
638  	temp = kmalloc(PATH_MAX + 11, GFP_KERNEL);
639  	if (!temp)
640  		return kstrdup("<no_memory>", gfp);
641  
642  	pathname = file_path(file, temp, PATH_MAX + 11);
643  	if (IS_ERR(pathname))
644  		pathname = kstrdup("<too_long>", gfp);
645  	else
646  		pathname = kstrdup_quotable(pathname, gfp);
647  
648  	kfree(temp);
649  	return pathname;
650  }
651  EXPORT_SYMBOL_GPL(kstrdup_quotable_file);
652