xref: /linux/lib/string_helpers.c (revision 005438a8eef063495ac059d128eea71b58de50e5)
1 /*
2  * Helpers for formatting and printing strings
3  *
4  * Copyright 31 August 2008 James Bottomley
5  * Copyright (C) 2013, Intel Corporation
6  */
7 #include <linux/bug.h>
8 #include <linux/kernel.h>
9 #include <linux/math64.h>
10 #include <linux/export.h>
11 #include <linux/ctype.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/string_helpers.h>
15 
16 /**
17  * string_get_size - get the size in the specified units
18  * @size:	The size to be converted in blocks
19  * @blk_size:	Size of the block (use 1 for size in bytes)
20  * @units:	units to use (powers of 1000 or 1024)
21  * @buf:	buffer to format to
22  * @len:	length of buffer
23  *
24  * This function returns a string formatted to 3 significant figures
25  * giving the size in the required units.  @buf should have room for
26  * at least 9 bytes and will always be zero terminated.
27  *
28  */
29 void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
30 		     char *buf, int len)
31 {
32 	static const char *const units_10[] = {
33 		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
34 	};
35 	static const char *const units_2[] = {
36 		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
37 	};
38 	static const char *const *const units_str[] = {
39 		[STRING_UNITS_10] = units_10,
40 		[STRING_UNITS_2] = units_2,
41 	};
42 	static const unsigned int divisor[] = {
43 		[STRING_UNITS_10] = 1000,
44 		[STRING_UNITS_2] = 1024,
45 	};
46 	int i, j;
47 	u32 remainder = 0, sf_cap, exp;
48 	char tmp[8];
49 	const char *unit;
50 
51 	tmp[0] = '\0';
52 	i = 0;
53 	if (!size)
54 		goto out;
55 
56 	while (blk_size >= divisor[units]) {
57 		remainder = do_div(blk_size, divisor[units]);
58 		i++;
59 	}
60 
61 	exp = divisor[units] / (u32)blk_size;
62 	if (size >= exp) {
63 		remainder = do_div(size, divisor[units]);
64 		remainder *= blk_size;
65 		i++;
66 	} else {
67 		remainder *= size;
68 	}
69 
70 	size *= blk_size;
71 	size += remainder / divisor[units];
72 	remainder %= divisor[units];
73 
74 	while (size >= divisor[units]) {
75 		remainder = do_div(size, divisor[units]);
76 		i++;
77 	}
78 
79 	sf_cap = size;
80 	for (j = 0; sf_cap*10 < 1000; j++)
81 		sf_cap *= 10;
82 
83 	if (j) {
84 		remainder *= 1000;
85 		remainder /= divisor[units];
86 		snprintf(tmp, sizeof(tmp), ".%03u", remainder);
87 		tmp[j+1] = '\0';
88 	}
89 
90  out:
91 	if (i >= ARRAY_SIZE(units_2))
92 		unit = "UNK";
93 	else
94 		unit = units_str[units][i];
95 
96 	snprintf(buf, len, "%u%s %s", (u32)size,
97 		 tmp, unit);
98 }
99 EXPORT_SYMBOL(string_get_size);
100 
101 static bool unescape_space(char **src, char **dst)
102 {
103 	char *p = *dst, *q = *src;
104 
105 	switch (*q) {
106 	case 'n':
107 		*p = '\n';
108 		break;
109 	case 'r':
110 		*p = '\r';
111 		break;
112 	case 't':
113 		*p = '\t';
114 		break;
115 	case 'v':
116 		*p = '\v';
117 		break;
118 	case 'f':
119 		*p = '\f';
120 		break;
121 	default:
122 		return false;
123 	}
124 	*dst += 1;
125 	*src += 1;
126 	return true;
127 }
128 
129 static bool unescape_octal(char **src, char **dst)
130 {
131 	char *p = *dst, *q = *src;
132 	u8 num;
133 
134 	if (isodigit(*q) == 0)
135 		return false;
136 
137 	num = (*q++) & 7;
138 	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
139 		num <<= 3;
140 		num += (*q++) & 7;
141 	}
142 	*p = num;
143 	*dst += 1;
144 	*src = q;
145 	return true;
146 }
147 
148 static bool unescape_hex(char **src, char **dst)
149 {
150 	char *p = *dst, *q = *src;
151 	int digit;
152 	u8 num;
153 
154 	if (*q++ != 'x')
155 		return false;
156 
157 	num = digit = hex_to_bin(*q++);
158 	if (digit < 0)
159 		return false;
160 
161 	digit = hex_to_bin(*q);
162 	if (digit >= 0) {
163 		q++;
164 		num = (num << 4) | digit;
165 	}
166 	*p = num;
167 	*dst += 1;
168 	*src = q;
169 	return true;
170 }
171 
172 static bool unescape_special(char **src, char **dst)
173 {
174 	char *p = *dst, *q = *src;
175 
176 	switch (*q) {
177 	case '\"':
178 		*p = '\"';
179 		break;
180 	case '\\':
181 		*p = '\\';
182 		break;
183 	case 'a':
184 		*p = '\a';
185 		break;
186 	case 'e':
187 		*p = '\e';
188 		break;
189 	default:
190 		return false;
191 	}
192 	*dst += 1;
193 	*src += 1;
194 	return true;
195 }
196 
197 /**
198  * string_unescape - unquote characters in the given string
199  * @src:	source buffer (escaped)
200  * @dst:	destination buffer (unescaped)
201  * @size:	size of the destination buffer (0 to unlimit)
202  * @flags:	combination of the flags (bitwise OR):
203  *	%UNESCAPE_SPACE:
204  *		'\f' - form feed
205  *		'\n' - new line
206  *		'\r' - carriage return
207  *		'\t' - horizontal tab
208  *		'\v' - vertical tab
209  *	%UNESCAPE_OCTAL:
210  *		'\NNN' - byte with octal value NNN (1 to 3 digits)
211  *	%UNESCAPE_HEX:
212  *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
213  *	%UNESCAPE_SPECIAL:
214  *		'\"' - double quote
215  *		'\\' - backslash
216  *		'\a' - alert (BEL)
217  *		'\e' - escape
218  *	%UNESCAPE_ANY:
219  *		all previous together
220  *
221  * Description:
222  * The function unquotes characters in the given string.
223  *
224  * Because the size of the output will be the same as or less than the size of
225  * the input, the transformation may be performed in place.
226  *
227  * Caller must provide valid source and destination pointers. Be aware that
228  * destination buffer will always be NULL-terminated. Source string must be
229  * NULL-terminated as well.
230  *
231  * Return:
232  * The amount of the characters processed to the destination buffer excluding
233  * trailing '\0' is returned.
234  */
235 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
236 {
237 	char *out = dst;
238 
239 	while (*src && --size) {
240 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
241 			src++;
242 			size--;
243 
244 			if (flags & UNESCAPE_SPACE &&
245 					unescape_space(&src, &out))
246 				continue;
247 
248 			if (flags & UNESCAPE_OCTAL &&
249 					unescape_octal(&src, &out))
250 				continue;
251 
252 			if (flags & UNESCAPE_HEX &&
253 					unescape_hex(&src, &out))
254 				continue;
255 
256 			if (flags & UNESCAPE_SPECIAL &&
257 					unescape_special(&src, &out))
258 				continue;
259 
260 			*out++ = '\\';
261 		}
262 		*out++ = *src++;
263 	}
264 	*out = '\0';
265 
266 	return out - dst;
267 }
268 EXPORT_SYMBOL(string_unescape);
269 
270 static bool escape_passthrough(unsigned char c, char **dst, char *end)
271 {
272 	char *out = *dst;
273 
274 	if (out < end)
275 		*out = c;
276 	*dst = out + 1;
277 	return true;
278 }
279 
280 static bool escape_space(unsigned char c, char **dst, char *end)
281 {
282 	char *out = *dst;
283 	unsigned char to;
284 
285 	switch (c) {
286 	case '\n':
287 		to = 'n';
288 		break;
289 	case '\r':
290 		to = 'r';
291 		break;
292 	case '\t':
293 		to = 't';
294 		break;
295 	case '\v':
296 		to = 'v';
297 		break;
298 	case '\f':
299 		to = 'f';
300 		break;
301 	default:
302 		return false;
303 	}
304 
305 	if (out < end)
306 		*out = '\\';
307 	++out;
308 	if (out < end)
309 		*out = to;
310 	++out;
311 
312 	*dst = out;
313 	return true;
314 }
315 
316 static bool escape_special(unsigned char c, char **dst, char *end)
317 {
318 	char *out = *dst;
319 	unsigned char to;
320 
321 	switch (c) {
322 	case '\\':
323 		to = '\\';
324 		break;
325 	case '\a':
326 		to = 'a';
327 		break;
328 	case '\e':
329 		to = 'e';
330 		break;
331 	default:
332 		return false;
333 	}
334 
335 	if (out < end)
336 		*out = '\\';
337 	++out;
338 	if (out < end)
339 		*out = to;
340 	++out;
341 
342 	*dst = out;
343 	return true;
344 }
345 
346 static bool escape_null(unsigned char c, char **dst, char *end)
347 {
348 	char *out = *dst;
349 
350 	if (c)
351 		return false;
352 
353 	if (out < end)
354 		*out = '\\';
355 	++out;
356 	if (out < end)
357 		*out = '0';
358 	++out;
359 
360 	*dst = out;
361 	return true;
362 }
363 
364 static bool escape_octal(unsigned char c, char **dst, char *end)
365 {
366 	char *out = *dst;
367 
368 	if (out < end)
369 		*out = '\\';
370 	++out;
371 	if (out < end)
372 		*out = ((c >> 6) & 0x07) + '0';
373 	++out;
374 	if (out < end)
375 		*out = ((c >> 3) & 0x07) + '0';
376 	++out;
377 	if (out < end)
378 		*out = ((c >> 0) & 0x07) + '0';
379 	++out;
380 
381 	*dst = out;
382 	return true;
383 }
384 
385 static bool escape_hex(unsigned char c, char **dst, char *end)
386 {
387 	char *out = *dst;
388 
389 	if (out < end)
390 		*out = '\\';
391 	++out;
392 	if (out < end)
393 		*out = 'x';
394 	++out;
395 	if (out < end)
396 		*out = hex_asc_hi(c);
397 	++out;
398 	if (out < end)
399 		*out = hex_asc_lo(c);
400 	++out;
401 
402 	*dst = out;
403 	return true;
404 }
405 
406 /**
407  * string_escape_mem - quote characters in the given memory buffer
408  * @src:	source buffer (unescaped)
409  * @isz:	source buffer size
410  * @dst:	destination buffer (escaped)
411  * @osz:	destination buffer size
412  * @flags:	combination of the flags (bitwise OR):
413  *	%ESCAPE_SPACE:
414  *		'\f' - form feed
415  *		'\n' - new line
416  *		'\r' - carriage return
417  *		'\t' - horizontal tab
418  *		'\v' - vertical tab
419  *	%ESCAPE_SPECIAL:
420  *		'\\' - backslash
421  *		'\a' - alert (BEL)
422  *		'\e' - escape
423  *	%ESCAPE_NULL:
424  *		'\0' - null
425  *	%ESCAPE_OCTAL:
426  *		'\NNN' - byte with octal value NNN (3 digits)
427  *	%ESCAPE_ANY:
428  *		all previous together
429  *	%ESCAPE_NP:
430  *		escape only non-printable characters (checked by isprint)
431  *	%ESCAPE_ANY_NP:
432  *		all previous together
433  *	%ESCAPE_HEX:
434  *		'\xHH' - byte with hexadecimal value HH (2 digits)
435  * @esc:	NULL-terminated string of characters any of which, if found in
436  *		the source, has to be escaped
437  *
438  * Description:
439  * The process of escaping byte buffer includes several parts. They are applied
440  * in the following sequence.
441  *	1. The character is matched to the printable class, if asked, and in
442  *	   case of match it passes through to the output.
443  *	2. The character is not matched to the one from @esc string and thus
444  *	   must go as is to the output.
445  *	3. The character is checked if it falls into the class given by @flags.
446  *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
447  *	   character. Note that they actually can't go together, otherwise
448  *	   %ESCAPE_HEX will be ignored.
449  *
450  * Caller must provide valid source and destination pointers. Be aware that
451  * destination buffer will not be NULL-terminated, thus caller have to append
452  * it if needs.
453  *
454  * Return:
455  * The total size of the escaped output that would be generated for
456  * the given input and flags. To check whether the output was
457  * truncated, compare the return value to osz. There is room left in
458  * dst for a '\0' terminator if and only if ret < osz.
459  */
460 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
461 		      unsigned int flags, const char *esc)
462 {
463 	char *p = dst;
464 	char *end = p + osz;
465 	bool is_dict = esc && *esc;
466 
467 	while (isz--) {
468 		unsigned char c = *src++;
469 
470 		/*
471 		 * Apply rules in the following sequence:
472 		 *	- the character is printable, when @flags has
473 		 *	  %ESCAPE_NP bit set
474 		 *	- the @esc string is supplied and does not contain a
475 		 *	  character under question
476 		 *	- the character doesn't fall into a class of symbols
477 		 *	  defined by given @flags
478 		 * In these cases we just pass through a character to the
479 		 * output buffer.
480 		 */
481 		if ((flags & ESCAPE_NP && isprint(c)) ||
482 		    (is_dict && !strchr(esc, c))) {
483 			/* do nothing */
484 		} else {
485 			if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
486 				continue;
487 
488 			if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
489 				continue;
490 
491 			if (flags & ESCAPE_NULL && escape_null(c, &p, end))
492 				continue;
493 
494 			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
495 			if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
496 				continue;
497 
498 			if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
499 				continue;
500 		}
501 
502 		escape_passthrough(c, &p, end);
503 	}
504 
505 	return p - dst;
506 }
507 EXPORT_SYMBOL(string_escape_mem);
508