xref: /linux/lib/string_helpers.c (revision 4949009eb8d40a441dcddcd96e101e77d31cf1b2)
1 /*
2  * Helpers for formatting and printing strings
3  *
4  * Copyright 31 August 2008 James Bottomley
5  * Copyright (C) 2013, Intel Corporation
6  */
7 #include <linux/kernel.h>
8 #include <linux/math64.h>
9 #include <linux/export.h>
10 #include <linux/ctype.h>
11 #include <linux/errno.h>
12 #include <linux/string.h>
13 #include <linux/string_helpers.h>
14 
15 /**
16  * string_get_size - get the size in the specified units
17  * @size:	The size to be converted
18  * @units:	units to use (powers of 1000 or 1024)
19  * @buf:	buffer to format to
20  * @len:	length of buffer
21  *
22  * This function returns a string formatted to 3 significant figures
23  * giving the size in the required units.  Returns 0 on success or
24  * error on failure.  @buf is always zero terminated.
25  *
26  */
27 int string_get_size(u64 size, const enum string_size_units units,
28 		    char *buf, int len)
29 {
30 	static const char *const units_10[] = {
31 		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
32 	};
33 	static const char *const units_2[] = {
34 		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
35 		NULL
36 	};
37 	static const char *const *const units_str[] = {
38 		[STRING_UNITS_10] = units_10,
39 		[STRING_UNITS_2] = units_2,
40 	};
41 	static const unsigned int divisor[] = {
42 		[STRING_UNITS_10] = 1000,
43 		[STRING_UNITS_2] = 1024,
44 	};
45 	int i, j;
46 	u64 remainder = 0, sf_cap;
47 	char tmp[8];
48 
49 	tmp[0] = '\0';
50 	i = 0;
51 	if (size >= divisor[units]) {
52 		while (size >= divisor[units] && units_str[units][i]) {
53 			remainder = do_div(size, divisor[units]);
54 			i++;
55 		}
56 
57 		sf_cap = size;
58 		for (j = 0; sf_cap*10 < 1000; j++)
59 			sf_cap *= 10;
60 
61 		if (j) {
62 			remainder *= 1000;
63 			do_div(remainder, divisor[units]);
64 			snprintf(tmp, sizeof(tmp), ".%03lld",
65 				 (unsigned long long)remainder);
66 			tmp[j+1] = '\0';
67 		}
68 	}
69 
70 	snprintf(buf, len, "%lld%s %s", (unsigned long long)size,
71 		 tmp, units_str[units][i]);
72 
73 	return 0;
74 }
75 EXPORT_SYMBOL(string_get_size);
76 
77 static bool unescape_space(char **src, char **dst)
78 {
79 	char *p = *dst, *q = *src;
80 
81 	switch (*q) {
82 	case 'n':
83 		*p = '\n';
84 		break;
85 	case 'r':
86 		*p = '\r';
87 		break;
88 	case 't':
89 		*p = '\t';
90 		break;
91 	case 'v':
92 		*p = '\v';
93 		break;
94 	case 'f':
95 		*p = '\f';
96 		break;
97 	default:
98 		return false;
99 	}
100 	*dst += 1;
101 	*src += 1;
102 	return true;
103 }
104 
105 static bool unescape_octal(char **src, char **dst)
106 {
107 	char *p = *dst, *q = *src;
108 	u8 num;
109 
110 	if (isodigit(*q) == 0)
111 		return false;
112 
113 	num = (*q++) & 7;
114 	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
115 		num <<= 3;
116 		num += (*q++) & 7;
117 	}
118 	*p = num;
119 	*dst += 1;
120 	*src = q;
121 	return true;
122 }
123 
124 static bool unescape_hex(char **src, char **dst)
125 {
126 	char *p = *dst, *q = *src;
127 	int digit;
128 	u8 num;
129 
130 	if (*q++ != 'x')
131 		return false;
132 
133 	num = digit = hex_to_bin(*q++);
134 	if (digit < 0)
135 		return false;
136 
137 	digit = hex_to_bin(*q);
138 	if (digit >= 0) {
139 		q++;
140 		num = (num << 4) | digit;
141 	}
142 	*p = num;
143 	*dst += 1;
144 	*src = q;
145 	return true;
146 }
147 
148 static bool unescape_special(char **src, char **dst)
149 {
150 	char *p = *dst, *q = *src;
151 
152 	switch (*q) {
153 	case '\"':
154 		*p = '\"';
155 		break;
156 	case '\\':
157 		*p = '\\';
158 		break;
159 	case 'a':
160 		*p = '\a';
161 		break;
162 	case 'e':
163 		*p = '\e';
164 		break;
165 	default:
166 		return false;
167 	}
168 	*dst += 1;
169 	*src += 1;
170 	return true;
171 }
172 
173 /**
174  * string_unescape - unquote characters in the given string
175  * @src:	source buffer (escaped)
176  * @dst:	destination buffer (unescaped)
177  * @size:	size of the destination buffer (0 to unlimit)
178  * @flags:	combination of the flags (bitwise OR):
179  *	%UNESCAPE_SPACE:
180  *		'\f' - form feed
181  *		'\n' - new line
182  *		'\r' - carriage return
183  *		'\t' - horizontal tab
184  *		'\v' - vertical tab
185  *	%UNESCAPE_OCTAL:
186  *		'\NNN' - byte with octal value NNN (1 to 3 digits)
187  *	%UNESCAPE_HEX:
188  *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
189  *	%UNESCAPE_SPECIAL:
190  *		'\"' - double quote
191  *		'\\' - backslash
192  *		'\a' - alert (BEL)
193  *		'\e' - escape
194  *	%UNESCAPE_ANY:
195  *		all previous together
196  *
197  * Description:
198  * The function unquotes characters in the given string.
199  *
200  * Because the size of the output will be the same as or less than the size of
201  * the input, the transformation may be performed in place.
202  *
203  * Caller must provide valid source and destination pointers. Be aware that
204  * destination buffer will always be NULL-terminated. Source string must be
205  * NULL-terminated as well.
206  *
207  * Return:
208  * The amount of the characters processed to the destination buffer excluding
209  * trailing '\0' is returned.
210  */
211 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
212 {
213 	char *out = dst;
214 
215 	while (*src && --size) {
216 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
217 			src++;
218 			size--;
219 
220 			if (flags & UNESCAPE_SPACE &&
221 					unescape_space(&src, &out))
222 				continue;
223 
224 			if (flags & UNESCAPE_OCTAL &&
225 					unescape_octal(&src, &out))
226 				continue;
227 
228 			if (flags & UNESCAPE_HEX &&
229 					unescape_hex(&src, &out))
230 				continue;
231 
232 			if (flags & UNESCAPE_SPECIAL &&
233 					unescape_special(&src, &out))
234 				continue;
235 
236 			*out++ = '\\';
237 		}
238 		*out++ = *src++;
239 	}
240 	*out = '\0';
241 
242 	return out - dst;
243 }
244 EXPORT_SYMBOL(string_unescape);
245 
246 static int escape_passthrough(unsigned char c, char **dst, size_t *osz)
247 {
248 	char *out = *dst;
249 
250 	if (*osz < 1)
251 		return -ENOMEM;
252 
253 	*out++ = c;
254 
255 	*dst = out;
256 	*osz -= 1;
257 
258 	return 1;
259 }
260 
261 static int escape_space(unsigned char c, char **dst, size_t *osz)
262 {
263 	char *out = *dst;
264 	unsigned char to;
265 
266 	if (*osz < 2)
267 		return -ENOMEM;
268 
269 	switch (c) {
270 	case '\n':
271 		to = 'n';
272 		break;
273 	case '\r':
274 		to = 'r';
275 		break;
276 	case '\t':
277 		to = 't';
278 		break;
279 	case '\v':
280 		to = 'v';
281 		break;
282 	case '\f':
283 		to = 'f';
284 		break;
285 	default:
286 		return 0;
287 	}
288 
289 	*out++ = '\\';
290 	*out++ = to;
291 
292 	*dst = out;
293 	*osz -= 2;
294 
295 	return 1;
296 }
297 
298 static int escape_special(unsigned char c, char **dst, size_t *osz)
299 {
300 	char *out = *dst;
301 	unsigned char to;
302 
303 	if (*osz < 2)
304 		return -ENOMEM;
305 
306 	switch (c) {
307 	case '\\':
308 		to = '\\';
309 		break;
310 	case '\a':
311 		to = 'a';
312 		break;
313 	case '\e':
314 		to = 'e';
315 		break;
316 	default:
317 		return 0;
318 	}
319 
320 	*out++ = '\\';
321 	*out++ = to;
322 
323 	*dst = out;
324 	*osz -= 2;
325 
326 	return 1;
327 }
328 
329 static int escape_null(unsigned char c, char **dst, size_t *osz)
330 {
331 	char *out = *dst;
332 
333 	if (*osz < 2)
334 		return -ENOMEM;
335 
336 	if (c)
337 		return 0;
338 
339 	*out++ = '\\';
340 	*out++ = '0';
341 
342 	*dst = out;
343 	*osz -= 2;
344 
345 	return 1;
346 }
347 
348 static int escape_octal(unsigned char c, char **dst, size_t *osz)
349 {
350 	char *out = *dst;
351 
352 	if (*osz < 4)
353 		return -ENOMEM;
354 
355 	*out++ = '\\';
356 	*out++ = ((c >> 6) & 0x07) + '0';
357 	*out++ = ((c >> 3) & 0x07) + '0';
358 	*out++ = ((c >> 0) & 0x07) + '0';
359 
360 	*dst = out;
361 	*osz -= 4;
362 
363 	return 1;
364 }
365 
366 static int escape_hex(unsigned char c, char **dst, size_t *osz)
367 {
368 	char *out = *dst;
369 
370 	if (*osz < 4)
371 		return -ENOMEM;
372 
373 	*out++ = '\\';
374 	*out++ = 'x';
375 	*out++ = hex_asc_hi(c);
376 	*out++ = hex_asc_lo(c);
377 
378 	*dst = out;
379 	*osz -= 4;
380 
381 	return 1;
382 }
383 
384 /**
385  * string_escape_mem - quote characters in the given memory buffer
386  * @src:	source buffer (unescaped)
387  * @isz:	source buffer size
388  * @dst:	destination buffer (escaped)
389  * @osz:	destination buffer size
390  * @flags:	combination of the flags (bitwise OR):
391  *	%ESCAPE_SPACE:
392  *		'\f' - form feed
393  *		'\n' - new line
394  *		'\r' - carriage return
395  *		'\t' - horizontal tab
396  *		'\v' - vertical tab
397  *	%ESCAPE_SPECIAL:
398  *		'\\' - backslash
399  *		'\a' - alert (BEL)
400  *		'\e' - escape
401  *	%ESCAPE_NULL:
402  *		'\0' - null
403  *	%ESCAPE_OCTAL:
404  *		'\NNN' - byte with octal value NNN (3 digits)
405  *	%ESCAPE_ANY:
406  *		all previous together
407  *	%ESCAPE_NP:
408  *		escape only non-printable characters (checked by isprint)
409  *	%ESCAPE_ANY_NP:
410  *		all previous together
411  *	%ESCAPE_HEX:
412  *		'\xHH' - byte with hexadecimal value HH (2 digits)
413  * @esc:	NULL-terminated string of characters any of which, if found in
414  *		the source, has to be escaped
415  *
416  * Description:
417  * The process of escaping byte buffer includes several parts. They are applied
418  * in the following sequence.
419  *	1. The character is matched to the printable class, if asked, and in
420  *	   case of match it passes through to the output.
421  *	2. The character is not matched to the one from @esc string and thus
422  *	   must go as is to the output.
423  *	3. The character is checked if it falls into the class given by @flags.
424  *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
425  *	   character. Note that they actually can't go together, otherwise
426  *	   %ESCAPE_HEX will be ignored.
427  *
428  * Caller must provide valid source and destination pointers. Be aware that
429  * destination buffer will not be NULL-terminated, thus caller have to append
430  * it if needs.
431  *
432  * Return:
433  * The amount of the characters processed to the destination buffer, or
434  * %-ENOMEM if the size of buffer is not enough to put an escaped character is
435  * returned.
436  *
437  * Even in the case of error @dst pointer will be updated to point to the byte
438  * after the last processed character.
439  */
440 int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
441 		      unsigned int flags, const char *esc)
442 {
443 	char *out = *dst, *p = out;
444 	bool is_dict = esc && *esc;
445 	int ret = 0;
446 
447 	while (isz--) {
448 		unsigned char c = *src++;
449 
450 		/*
451 		 * Apply rules in the following sequence:
452 		 *	- the character is printable, when @flags has
453 		 *	  %ESCAPE_NP bit set
454 		 *	- the @esc string is supplied and does not contain a
455 		 *	  character under question
456 		 *	- the character doesn't fall into a class of symbols
457 		 *	  defined by given @flags
458 		 * In these cases we just pass through a character to the
459 		 * output buffer.
460 		 */
461 		if ((flags & ESCAPE_NP && isprint(c)) ||
462 		    (is_dict && !strchr(esc, c))) {
463 			/* do nothing */
464 		} else {
465 			if (flags & ESCAPE_SPACE) {
466 				ret = escape_space(c, &p, &osz);
467 				if (ret < 0)
468 					break;
469 				if (ret > 0)
470 					continue;
471 			}
472 
473 			if (flags & ESCAPE_SPECIAL) {
474 				ret = escape_special(c, &p, &osz);
475 				if (ret < 0)
476 					break;
477 				if (ret > 0)
478 					continue;
479 			}
480 
481 			if (flags & ESCAPE_NULL) {
482 				ret = escape_null(c, &p, &osz);
483 				if (ret < 0)
484 					break;
485 				if (ret > 0)
486 					continue;
487 			}
488 
489 			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
490 			if (flags & ESCAPE_OCTAL) {
491 				ret = escape_octal(c, &p, &osz);
492 				if (ret < 0)
493 					break;
494 				continue;
495 			}
496 			if (flags & ESCAPE_HEX) {
497 				ret = escape_hex(c, &p, &osz);
498 				if (ret < 0)
499 					break;
500 				continue;
501 			}
502 		}
503 
504 		ret = escape_passthrough(c, &p, &osz);
505 		if (ret < 0)
506 			break;
507 	}
508 
509 	*dst = p;
510 
511 	if (ret < 0)
512 		return ret;
513 
514 	return p - out;
515 }
516 EXPORT_SYMBOL(string_escape_mem);
517