xref: /linux/lib/string_helpers.c (revision 8c749ce93ee69e789e46b3be98de9e0cbfcf8ed8)
1 /*
2  * Helpers for formatting and printing strings
3  *
4  * Copyright 31 August 2008 James Bottomley
5  * Copyright (C) 2013, Intel Corporation
6  */
7 #include <linux/bug.h>
8 #include <linux/kernel.h>
9 #include <linux/math64.h>
10 #include <linux/export.h>
11 #include <linux/ctype.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/string_helpers.h>
15 
16 /**
17  * string_get_size - get the size in the specified units
18  * @size:	The size to be converted in blocks
19  * @blk_size:	Size of the block (use 1 for size in bytes)
20  * @units:	units to use (powers of 1000 or 1024)
21  * @buf:	buffer to format to
22  * @len:	length of buffer
23  *
24  * This function returns a string formatted to 3 significant figures
25  * giving the size in the required units.  @buf should have room for
26  * at least 9 bytes and will always be zero terminated.
27  *
28  */
29 void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
30 		     char *buf, int len)
31 {
32 	static const char *const units_10[] = {
33 		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
34 	};
35 	static const char *const units_2[] = {
36 		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
37 	};
38 	static const char *const *const units_str[] = {
39 		[STRING_UNITS_10] = units_10,
40 		[STRING_UNITS_2] = units_2,
41 	};
42 	static const unsigned int divisor[] = {
43 		[STRING_UNITS_10] = 1000,
44 		[STRING_UNITS_2] = 1024,
45 	};
46 	static const unsigned int rounding[] = { 500, 50, 5 };
47 	int i = 0, j;
48 	u32 remainder = 0, sf_cap;
49 	char tmp[8];
50 	const char *unit;
51 
52 	tmp[0] = '\0';
53 
54 	if (blk_size == 0)
55 		size = 0;
56 	if (size == 0)
57 		goto out;
58 
59 	/* This is Napier's algorithm.  Reduce the original block size to
60 	 *
61 	 * coefficient * divisor[units]^i
62 	 *
63 	 * we do the reduction so both coefficients are just under 32 bits so
64 	 * that multiplying them together won't overflow 64 bits and we keep
65 	 * as much precision as possible in the numbers.
66 	 *
67 	 * Note: it's safe to throw away the remainders here because all the
68 	 * precision is in the coefficients.
69 	 */
70 	while (blk_size >> 32) {
71 		do_div(blk_size, divisor[units]);
72 		i++;
73 	}
74 
75 	while (size >> 32) {
76 		do_div(size, divisor[units]);
77 		i++;
78 	}
79 
80 	/* now perform the actual multiplication keeping i as the sum of the
81 	 * two logarithms */
82 	size *= blk_size;
83 
84 	/* and logarithmically reduce it until it's just under the divisor */
85 	while (size >= divisor[units]) {
86 		remainder = do_div(size, divisor[units]);
87 		i++;
88 	}
89 
90 	/* work out in j how many digits of precision we need from the
91 	 * remainder */
92 	sf_cap = size;
93 	for (j = 0; sf_cap*10 < 1000; j++)
94 		sf_cap *= 10;
95 
96 	if (units == STRING_UNITS_2) {
97 		/* express the remainder as a decimal.  It's currently the
98 		 * numerator of a fraction whose denominator is
99 		 * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
100 		remainder *= 1000;
101 		remainder >>= 10;
102 	}
103 
104 	/* add a 5 to the digit below what will be printed to ensure
105 	 * an arithmetical round up and carry it through to size */
106 	remainder += rounding[j];
107 	if (remainder >= 1000) {
108 		remainder -= 1000;
109 		size += 1;
110 	}
111 
112 	if (j) {
113 		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
114 		tmp[j+1] = '\0';
115 	}
116 
117  out:
118 	if (i >= ARRAY_SIZE(units_2))
119 		unit = "UNK";
120 	else
121 		unit = units_str[units][i];
122 
123 	snprintf(buf, len, "%u%s %s", (u32)size,
124 		 tmp, unit);
125 }
126 EXPORT_SYMBOL(string_get_size);
127 
128 static bool unescape_space(char **src, char **dst)
129 {
130 	char *p = *dst, *q = *src;
131 
132 	switch (*q) {
133 	case 'n':
134 		*p = '\n';
135 		break;
136 	case 'r':
137 		*p = '\r';
138 		break;
139 	case 't':
140 		*p = '\t';
141 		break;
142 	case 'v':
143 		*p = '\v';
144 		break;
145 	case 'f':
146 		*p = '\f';
147 		break;
148 	default:
149 		return false;
150 	}
151 	*dst += 1;
152 	*src += 1;
153 	return true;
154 }
155 
156 static bool unescape_octal(char **src, char **dst)
157 {
158 	char *p = *dst, *q = *src;
159 	u8 num;
160 
161 	if (isodigit(*q) == 0)
162 		return false;
163 
164 	num = (*q++) & 7;
165 	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
166 		num <<= 3;
167 		num += (*q++) & 7;
168 	}
169 	*p = num;
170 	*dst += 1;
171 	*src = q;
172 	return true;
173 }
174 
175 static bool unescape_hex(char **src, char **dst)
176 {
177 	char *p = *dst, *q = *src;
178 	int digit;
179 	u8 num;
180 
181 	if (*q++ != 'x')
182 		return false;
183 
184 	num = digit = hex_to_bin(*q++);
185 	if (digit < 0)
186 		return false;
187 
188 	digit = hex_to_bin(*q);
189 	if (digit >= 0) {
190 		q++;
191 		num = (num << 4) | digit;
192 	}
193 	*p = num;
194 	*dst += 1;
195 	*src = q;
196 	return true;
197 }
198 
199 static bool unescape_special(char **src, char **dst)
200 {
201 	char *p = *dst, *q = *src;
202 
203 	switch (*q) {
204 	case '\"':
205 		*p = '\"';
206 		break;
207 	case '\\':
208 		*p = '\\';
209 		break;
210 	case 'a':
211 		*p = '\a';
212 		break;
213 	case 'e':
214 		*p = '\e';
215 		break;
216 	default:
217 		return false;
218 	}
219 	*dst += 1;
220 	*src += 1;
221 	return true;
222 }
223 
224 /**
225  * string_unescape - unquote characters in the given string
226  * @src:	source buffer (escaped)
227  * @dst:	destination buffer (unescaped)
228  * @size:	size of the destination buffer (0 to unlimit)
229  * @flags:	combination of the flags (bitwise OR):
230  *	%UNESCAPE_SPACE:
231  *		'\f' - form feed
232  *		'\n' - new line
233  *		'\r' - carriage return
234  *		'\t' - horizontal tab
235  *		'\v' - vertical tab
236  *	%UNESCAPE_OCTAL:
237  *		'\NNN' - byte with octal value NNN (1 to 3 digits)
238  *	%UNESCAPE_HEX:
239  *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
240  *	%UNESCAPE_SPECIAL:
241  *		'\"' - double quote
242  *		'\\' - backslash
243  *		'\a' - alert (BEL)
244  *		'\e' - escape
245  *	%UNESCAPE_ANY:
246  *		all previous together
247  *
248  * Description:
249  * The function unquotes characters in the given string.
250  *
251  * Because the size of the output will be the same as or less than the size of
252  * the input, the transformation may be performed in place.
253  *
254  * Caller must provide valid source and destination pointers. Be aware that
255  * destination buffer will always be NULL-terminated. Source string must be
256  * NULL-terminated as well.
257  *
258  * Return:
259  * The amount of the characters processed to the destination buffer excluding
260  * trailing '\0' is returned.
261  */
262 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
263 {
264 	char *out = dst;
265 
266 	while (*src && --size) {
267 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
268 			src++;
269 			size--;
270 
271 			if (flags & UNESCAPE_SPACE &&
272 					unescape_space(&src, &out))
273 				continue;
274 
275 			if (flags & UNESCAPE_OCTAL &&
276 					unescape_octal(&src, &out))
277 				continue;
278 
279 			if (flags & UNESCAPE_HEX &&
280 					unescape_hex(&src, &out))
281 				continue;
282 
283 			if (flags & UNESCAPE_SPECIAL &&
284 					unescape_special(&src, &out))
285 				continue;
286 
287 			*out++ = '\\';
288 		}
289 		*out++ = *src++;
290 	}
291 	*out = '\0';
292 
293 	return out - dst;
294 }
295 EXPORT_SYMBOL(string_unescape);
296 
297 static bool escape_passthrough(unsigned char c, char **dst, char *end)
298 {
299 	char *out = *dst;
300 
301 	if (out < end)
302 		*out = c;
303 	*dst = out + 1;
304 	return true;
305 }
306 
307 static bool escape_space(unsigned char c, char **dst, char *end)
308 {
309 	char *out = *dst;
310 	unsigned char to;
311 
312 	switch (c) {
313 	case '\n':
314 		to = 'n';
315 		break;
316 	case '\r':
317 		to = 'r';
318 		break;
319 	case '\t':
320 		to = 't';
321 		break;
322 	case '\v':
323 		to = 'v';
324 		break;
325 	case '\f':
326 		to = 'f';
327 		break;
328 	default:
329 		return false;
330 	}
331 
332 	if (out < end)
333 		*out = '\\';
334 	++out;
335 	if (out < end)
336 		*out = to;
337 	++out;
338 
339 	*dst = out;
340 	return true;
341 }
342 
343 static bool escape_special(unsigned char c, char **dst, char *end)
344 {
345 	char *out = *dst;
346 	unsigned char to;
347 
348 	switch (c) {
349 	case '\\':
350 		to = '\\';
351 		break;
352 	case '\a':
353 		to = 'a';
354 		break;
355 	case '\e':
356 		to = 'e';
357 		break;
358 	default:
359 		return false;
360 	}
361 
362 	if (out < end)
363 		*out = '\\';
364 	++out;
365 	if (out < end)
366 		*out = to;
367 	++out;
368 
369 	*dst = out;
370 	return true;
371 }
372 
373 static bool escape_null(unsigned char c, char **dst, char *end)
374 {
375 	char *out = *dst;
376 
377 	if (c)
378 		return false;
379 
380 	if (out < end)
381 		*out = '\\';
382 	++out;
383 	if (out < end)
384 		*out = '0';
385 	++out;
386 
387 	*dst = out;
388 	return true;
389 }
390 
391 static bool escape_octal(unsigned char c, char **dst, char *end)
392 {
393 	char *out = *dst;
394 
395 	if (out < end)
396 		*out = '\\';
397 	++out;
398 	if (out < end)
399 		*out = ((c >> 6) & 0x07) + '0';
400 	++out;
401 	if (out < end)
402 		*out = ((c >> 3) & 0x07) + '0';
403 	++out;
404 	if (out < end)
405 		*out = ((c >> 0) & 0x07) + '0';
406 	++out;
407 
408 	*dst = out;
409 	return true;
410 }
411 
412 static bool escape_hex(unsigned char c, char **dst, char *end)
413 {
414 	char *out = *dst;
415 
416 	if (out < end)
417 		*out = '\\';
418 	++out;
419 	if (out < end)
420 		*out = 'x';
421 	++out;
422 	if (out < end)
423 		*out = hex_asc_hi(c);
424 	++out;
425 	if (out < end)
426 		*out = hex_asc_lo(c);
427 	++out;
428 
429 	*dst = out;
430 	return true;
431 }
432 
433 /**
434  * string_escape_mem - quote characters in the given memory buffer
435  * @src:	source buffer (unescaped)
436  * @isz:	source buffer size
437  * @dst:	destination buffer (escaped)
438  * @osz:	destination buffer size
439  * @flags:	combination of the flags (bitwise OR):
440  *	%ESCAPE_SPACE: (special white space, not space itself)
441  *		'\f' - form feed
442  *		'\n' - new line
443  *		'\r' - carriage return
444  *		'\t' - horizontal tab
445  *		'\v' - vertical tab
446  *	%ESCAPE_SPECIAL:
447  *		'\\' - backslash
448  *		'\a' - alert (BEL)
449  *		'\e' - escape
450  *	%ESCAPE_NULL:
451  *		'\0' - null
452  *	%ESCAPE_OCTAL:
453  *		'\NNN' - byte with octal value NNN (3 digits)
454  *	%ESCAPE_ANY:
455  *		all previous together
456  *	%ESCAPE_NP:
457  *		escape only non-printable characters (checked by isprint)
458  *	%ESCAPE_ANY_NP:
459  *		all previous together
460  *	%ESCAPE_HEX:
461  *		'\xHH' - byte with hexadecimal value HH (2 digits)
462  * @only:	NULL-terminated string containing characters used to limit
463  *		the selected escape class. If characters are included in @only
464  *		that would not normally be escaped by the classes selected
465  *		in @flags, they will be copied to @dst unescaped.
466  *
467  * Description:
468  * The process of escaping byte buffer includes several parts. They are applied
469  * in the following sequence.
470  *	1. The character is matched to the printable class, if asked, and in
471  *	   case of match it passes through to the output.
472  *	2. The character is not matched to the one from @only string and thus
473  *	   must go as-is to the output.
474  *	3. The character is checked if it falls into the class given by @flags.
475  *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
476  *	   character. Note that they actually can't go together, otherwise
477  *	   %ESCAPE_HEX will be ignored.
478  *
479  * Caller must provide valid source and destination pointers. Be aware that
480  * destination buffer will not be NULL-terminated, thus caller have to append
481  * it if needs.
482  *
483  * Return:
484  * The total size of the escaped output that would be generated for
485  * the given input and flags. To check whether the output was
486  * truncated, compare the return value to osz. There is room left in
487  * dst for a '\0' terminator if and only if ret < osz.
488  */
489 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
490 		      unsigned int flags, const char *only)
491 {
492 	char *p = dst;
493 	char *end = p + osz;
494 	bool is_dict = only && *only;
495 
496 	while (isz--) {
497 		unsigned char c = *src++;
498 
499 		/*
500 		 * Apply rules in the following sequence:
501 		 *	- the character is printable, when @flags has
502 		 *	  %ESCAPE_NP bit set
503 		 *	- the @only string is supplied and does not contain a
504 		 *	  character under question
505 		 *	- the character doesn't fall into a class of symbols
506 		 *	  defined by given @flags
507 		 * In these cases we just pass through a character to the
508 		 * output buffer.
509 		 */
510 		if ((flags & ESCAPE_NP && isprint(c)) ||
511 		    (is_dict && !strchr(only, c))) {
512 			/* do nothing */
513 		} else {
514 			if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
515 				continue;
516 
517 			if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
518 				continue;
519 
520 			if (flags & ESCAPE_NULL && escape_null(c, &p, end))
521 				continue;
522 
523 			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
524 			if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
525 				continue;
526 
527 			if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
528 				continue;
529 		}
530 
531 		escape_passthrough(c, &p, end);
532 	}
533 
534 	return p - dst;
535 }
536 EXPORT_SYMBOL(string_escape_mem);
537