xref: /linux/lib/string_helpers.c (revision b85d45947951d23cb22d90caecf4c1eb81342c96)
1 /*
2  * Helpers for formatting and printing strings
3  *
4  * Copyright 31 August 2008 James Bottomley
5  * Copyright (C) 2013, Intel Corporation
6  */
7 #include <linux/bug.h>
8 #include <linux/kernel.h>
9 #include <linux/math64.h>
10 #include <linux/export.h>
11 #include <linux/ctype.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/string_helpers.h>
15 
16 /**
17  * string_get_size - get the size in the specified units
18  * @size:	The size to be converted in blocks
19  * @blk_size:	Size of the block (use 1 for size in bytes)
20  * @units:	units to use (powers of 1000 or 1024)
21  * @buf:	buffer to format to
22  * @len:	length of buffer
23  *
24  * This function returns a string formatted to 3 significant figures
25  * giving the size in the required units.  @buf should have room for
26  * at least 9 bytes and will always be zero terminated.
27  *
28  */
29 void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
30 		     char *buf, int len)
31 {
32 	static const char *const units_10[] = {
33 		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
34 	};
35 	static const char *const units_2[] = {
36 		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
37 	};
38 	static const char *const *const units_str[] = {
39 		[STRING_UNITS_10] = units_10,
40 		[STRING_UNITS_2] = units_2,
41 	};
42 	static const unsigned int divisor[] = {
43 		[STRING_UNITS_10] = 1000,
44 		[STRING_UNITS_2] = 1024,
45 	};
46 	int i, j;
47 	u32 remainder = 0, sf_cap, exp;
48 	char tmp[8];
49 	const char *unit;
50 
51 	tmp[0] = '\0';
52 	i = 0;
53 	if (!size)
54 		goto out;
55 
56 	while (blk_size >= divisor[units]) {
57 		remainder = do_div(blk_size, divisor[units]);
58 		i++;
59 	}
60 
61 	exp = divisor[units] / (u32)blk_size;
62 	/*
63 	 * size must be strictly greater than exp here to ensure that remainder
64 	 * is greater than divisor[units] coming out of the if below.
65 	 */
66 	if (size > exp) {
67 		remainder = do_div(size, divisor[units]);
68 		remainder *= blk_size;
69 		i++;
70 	} else {
71 		remainder *= size;
72 	}
73 
74 	size *= blk_size;
75 	size += remainder / divisor[units];
76 	remainder %= divisor[units];
77 
78 	while (size >= divisor[units]) {
79 		remainder = do_div(size, divisor[units]);
80 		i++;
81 	}
82 
83 	sf_cap = size;
84 	for (j = 0; sf_cap*10 < 1000; j++)
85 		sf_cap *= 10;
86 
87 	if (j) {
88 		remainder *= 1000;
89 		remainder /= divisor[units];
90 		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
91 		tmp[j+1] = '\0';
92 	}
93 
94  out:
95 	if (i >= ARRAY_SIZE(units_2))
96 		unit = "UNK";
97 	else
98 		unit = units_str[units][i];
99 
100 	snprintf(buf, len, "%u%s %s", (u32)size,
101 		 tmp, unit);
102 }
103 EXPORT_SYMBOL(string_get_size);
104 
105 static bool unescape_space(char **src, char **dst)
106 {
107 	char *p = *dst, *q = *src;
108 
109 	switch (*q) {
110 	case 'n':
111 		*p = '\n';
112 		break;
113 	case 'r':
114 		*p = '\r';
115 		break;
116 	case 't':
117 		*p = '\t';
118 		break;
119 	case 'v':
120 		*p = '\v';
121 		break;
122 	case 'f':
123 		*p = '\f';
124 		break;
125 	default:
126 		return false;
127 	}
128 	*dst += 1;
129 	*src += 1;
130 	return true;
131 }
132 
133 static bool unescape_octal(char **src, char **dst)
134 {
135 	char *p = *dst, *q = *src;
136 	u8 num;
137 
138 	if (isodigit(*q) == 0)
139 		return false;
140 
141 	num = (*q++) & 7;
142 	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
143 		num <<= 3;
144 		num += (*q++) & 7;
145 	}
146 	*p = num;
147 	*dst += 1;
148 	*src = q;
149 	return true;
150 }
151 
152 static bool unescape_hex(char **src, char **dst)
153 {
154 	char *p = *dst, *q = *src;
155 	int digit;
156 	u8 num;
157 
158 	if (*q++ != 'x')
159 		return false;
160 
161 	num = digit = hex_to_bin(*q++);
162 	if (digit < 0)
163 		return false;
164 
165 	digit = hex_to_bin(*q);
166 	if (digit >= 0) {
167 		q++;
168 		num = (num << 4) | digit;
169 	}
170 	*p = num;
171 	*dst += 1;
172 	*src = q;
173 	return true;
174 }
175 
176 static bool unescape_special(char **src, char **dst)
177 {
178 	char *p = *dst, *q = *src;
179 
180 	switch (*q) {
181 	case '\"':
182 		*p = '\"';
183 		break;
184 	case '\\':
185 		*p = '\\';
186 		break;
187 	case 'a':
188 		*p = '\a';
189 		break;
190 	case 'e':
191 		*p = '\e';
192 		break;
193 	default:
194 		return false;
195 	}
196 	*dst += 1;
197 	*src += 1;
198 	return true;
199 }
200 
201 /**
202  * string_unescape - unquote characters in the given string
203  * @src:	source buffer (escaped)
204  * @dst:	destination buffer (unescaped)
205  * @size:	size of the destination buffer (0 to unlimit)
206  * @flags:	combination of the flags (bitwise OR):
207  *	%UNESCAPE_SPACE:
208  *		'\f' - form feed
209  *		'\n' - new line
210  *		'\r' - carriage return
211  *		'\t' - horizontal tab
212  *		'\v' - vertical tab
213  *	%UNESCAPE_OCTAL:
214  *		'\NNN' - byte with octal value NNN (1 to 3 digits)
215  *	%UNESCAPE_HEX:
216  *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
217  *	%UNESCAPE_SPECIAL:
218  *		'\"' - double quote
219  *		'\\' - backslash
220  *		'\a' - alert (BEL)
221  *		'\e' - escape
222  *	%UNESCAPE_ANY:
223  *		all previous together
224  *
225  * Description:
226  * The function unquotes characters in the given string.
227  *
228  * Because the size of the output will be the same as or less than the size of
229  * the input, the transformation may be performed in place.
230  *
231  * Caller must provide valid source and destination pointers. Be aware that
232  * destination buffer will always be NULL-terminated. Source string must be
233  * NULL-terminated as well.
234  *
235  * Return:
236  * The amount of the characters processed to the destination buffer excluding
237  * trailing '\0' is returned.
238  */
239 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
240 {
241 	char *out = dst;
242 
243 	while (*src && --size) {
244 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
245 			src++;
246 			size--;
247 
248 			if (flags & UNESCAPE_SPACE &&
249 					unescape_space(&src, &out))
250 				continue;
251 
252 			if (flags & UNESCAPE_OCTAL &&
253 					unescape_octal(&src, &out))
254 				continue;
255 
256 			if (flags & UNESCAPE_HEX &&
257 					unescape_hex(&src, &out))
258 				continue;
259 
260 			if (flags & UNESCAPE_SPECIAL &&
261 					unescape_special(&src, &out))
262 				continue;
263 
264 			*out++ = '\\';
265 		}
266 		*out++ = *src++;
267 	}
268 	*out = '\0';
269 
270 	return out - dst;
271 }
272 EXPORT_SYMBOL(string_unescape);
273 
274 static bool escape_passthrough(unsigned char c, char **dst, char *end)
275 {
276 	char *out = *dst;
277 
278 	if (out < end)
279 		*out = c;
280 	*dst = out + 1;
281 	return true;
282 }
283 
284 static bool escape_space(unsigned char c, char **dst, char *end)
285 {
286 	char *out = *dst;
287 	unsigned char to;
288 
289 	switch (c) {
290 	case '\n':
291 		to = 'n';
292 		break;
293 	case '\r':
294 		to = 'r';
295 		break;
296 	case '\t':
297 		to = 't';
298 		break;
299 	case '\v':
300 		to = 'v';
301 		break;
302 	case '\f':
303 		to = 'f';
304 		break;
305 	default:
306 		return false;
307 	}
308 
309 	if (out < end)
310 		*out = '\\';
311 	++out;
312 	if (out < end)
313 		*out = to;
314 	++out;
315 
316 	*dst = out;
317 	return true;
318 }
319 
320 static bool escape_special(unsigned char c, char **dst, char *end)
321 {
322 	char *out = *dst;
323 	unsigned char to;
324 
325 	switch (c) {
326 	case '\\':
327 		to = '\\';
328 		break;
329 	case '\a':
330 		to = 'a';
331 		break;
332 	case '\e':
333 		to = 'e';
334 		break;
335 	default:
336 		return false;
337 	}
338 
339 	if (out < end)
340 		*out = '\\';
341 	++out;
342 	if (out < end)
343 		*out = to;
344 	++out;
345 
346 	*dst = out;
347 	return true;
348 }
349 
350 static bool escape_null(unsigned char c, char **dst, char *end)
351 {
352 	char *out = *dst;
353 
354 	if (c)
355 		return false;
356 
357 	if (out < end)
358 		*out = '\\';
359 	++out;
360 	if (out < end)
361 		*out = '0';
362 	++out;
363 
364 	*dst = out;
365 	return true;
366 }
367 
368 static bool escape_octal(unsigned char c, char **dst, char *end)
369 {
370 	char *out = *dst;
371 
372 	if (out < end)
373 		*out = '\\';
374 	++out;
375 	if (out < end)
376 		*out = ((c >> 6) & 0x07) + '0';
377 	++out;
378 	if (out < end)
379 		*out = ((c >> 3) & 0x07) + '0';
380 	++out;
381 	if (out < end)
382 		*out = ((c >> 0) & 0x07) + '0';
383 	++out;
384 
385 	*dst = out;
386 	return true;
387 }
388 
389 static bool escape_hex(unsigned char c, char **dst, char *end)
390 {
391 	char *out = *dst;
392 
393 	if (out < end)
394 		*out = '\\';
395 	++out;
396 	if (out < end)
397 		*out = 'x';
398 	++out;
399 	if (out < end)
400 		*out = hex_asc_hi(c);
401 	++out;
402 	if (out < end)
403 		*out = hex_asc_lo(c);
404 	++out;
405 
406 	*dst = out;
407 	return true;
408 }
409 
410 /**
411  * string_escape_mem - quote characters in the given memory buffer
412  * @src:	source buffer (unescaped)
413  * @isz:	source buffer size
414  * @dst:	destination buffer (escaped)
415  * @osz:	destination buffer size
416  * @flags:	combination of the flags (bitwise OR):
417  *	%ESCAPE_SPACE: (special white space, not space itself)
418  *		'\f' - form feed
419  *		'\n' - new line
420  *		'\r' - carriage return
421  *		'\t' - horizontal tab
422  *		'\v' - vertical tab
423  *	%ESCAPE_SPECIAL:
424  *		'\\' - backslash
425  *		'\a' - alert (BEL)
426  *		'\e' - escape
427  *	%ESCAPE_NULL:
428  *		'\0' - null
429  *	%ESCAPE_OCTAL:
430  *		'\NNN' - byte with octal value NNN (3 digits)
431  *	%ESCAPE_ANY:
432  *		all previous together
433  *	%ESCAPE_NP:
434  *		escape only non-printable characters (checked by isprint)
435  *	%ESCAPE_ANY_NP:
436  *		all previous together
437  *	%ESCAPE_HEX:
438  *		'\xHH' - byte with hexadecimal value HH (2 digits)
439  * @only:	NULL-terminated string containing characters used to limit
440  *		the selected escape class. If characters are included in @only
441  *		that would not normally be escaped by the classes selected
442  *		in @flags, they will be copied to @dst unescaped.
443  *
444  * Description:
445  * The process of escaping byte buffer includes several parts. They are applied
446  * in the following sequence.
447  *	1. The character is matched to the printable class, if asked, and in
448  *	   case of match it passes through to the output.
449  *	2. The character is not matched to the one from @only string and thus
450  *	   must go as-is to the output.
451  *	3. The character is checked if it falls into the class given by @flags.
452  *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
453  *	   character. Note that they actually can't go together, otherwise
454  *	   %ESCAPE_HEX will be ignored.
455  *
456  * Caller must provide valid source and destination pointers. Be aware that
457  * destination buffer will not be NULL-terminated, thus caller have to append
458  * it if needs.
459  *
460  * Return:
461  * The total size of the escaped output that would be generated for
462  * the given input and flags. To check whether the output was
463  * truncated, compare the return value to osz. There is room left in
464  * dst for a '\0' terminator if and only if ret < osz.
465  */
466 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
467 		      unsigned int flags, const char *only)
468 {
469 	char *p = dst;
470 	char *end = p + osz;
471 	bool is_dict = only && *only;
472 
473 	while (isz--) {
474 		unsigned char c = *src++;
475 
476 		/*
477 		 * Apply rules in the following sequence:
478 		 *	- the character is printable, when @flags has
479 		 *	  %ESCAPE_NP bit set
480 		 *	- the @only string is supplied and does not contain a
481 		 *	  character under question
482 		 *	- the character doesn't fall into a class of symbols
483 		 *	  defined by given @flags
484 		 * In these cases we just pass through a character to the
485 		 * output buffer.
486 		 */
487 		if ((flags & ESCAPE_NP && isprint(c)) ||
488 		    (is_dict && !strchr(only, c))) {
489 			/* do nothing */
490 		} else {
491 			if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
492 				continue;
493 
494 			if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
495 				continue;
496 
497 			if (flags & ESCAPE_NULL && escape_null(c, &p, end))
498 				continue;
499 
500 			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
501 			if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
502 				continue;
503 
504 			if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
505 				continue;
506 		}
507 
508 		escape_passthrough(c, &p, end);
509 	}
510 
511 	return p - dst;
512 }
513 EXPORT_SYMBOL(string_escape_mem);
514