xref: /linux/lib/string_helpers.c (revision e0bf6c5ca2d3281f231c5f0c9bf145e9513644de)
1 /*
2  * Helpers for formatting and printing strings
3  *
4  * Copyright 31 August 2008 James Bottomley
5  * Copyright (C) 2013, Intel Corporation
6  */
7 #include <linux/kernel.h>
8 #include <linux/math64.h>
9 #include <linux/export.h>
10 #include <linux/ctype.h>
11 #include <linux/errno.h>
12 #include <linux/string.h>
13 #include <linux/string_helpers.h>
14 
15 /**
16  * string_get_size - get the size in the specified units
17  * @size:	The size to be converted
18  * @units:	units to use (powers of 1000 or 1024)
19  * @buf:	buffer to format to
20  * @len:	length of buffer
21  *
22  * This function returns a string formatted to 3 significant figures
23  * giving the size in the required units.  @buf should have room for
24  * at least 9 bytes and will always be zero terminated.
25  *
26  */
27 void string_get_size(u64 size, const enum string_size_units units,
28 		     char *buf, int len)
29 {
30 	static const char *const units_10[] = {
31 		"B", "kB", "MB", "GB", "TB", "PB", "EB"
32 	};
33 	static const char *const units_2[] = {
34 		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"
35 	};
36 	static const char *const *const units_str[] = {
37 		[STRING_UNITS_10] = units_10,
38 		[STRING_UNITS_2] = units_2,
39 	};
40 	static const unsigned int divisor[] = {
41 		[STRING_UNITS_10] = 1000,
42 		[STRING_UNITS_2] = 1024,
43 	};
44 	int i, j;
45 	u32 remainder = 0, sf_cap;
46 	char tmp[8];
47 
48 	tmp[0] = '\0';
49 	i = 0;
50 	if (size >= divisor[units]) {
51 		while (size >= divisor[units]) {
52 			remainder = do_div(size, divisor[units]);
53 			i++;
54 		}
55 
56 		sf_cap = size;
57 		for (j = 0; sf_cap*10 < 1000; j++)
58 			sf_cap *= 10;
59 
60 		if (j) {
61 			remainder *= 1000;
62 			remainder /= divisor[units];
63 			snprintf(tmp, sizeof(tmp), ".%03u", remainder);
64 			tmp[j+1] = '\0';
65 		}
66 	}
67 
68 	snprintf(buf, len, "%u%s %s", (u32)size,
69 		 tmp, units_str[units][i]);
70 }
71 EXPORT_SYMBOL(string_get_size);
72 
73 static bool unescape_space(char **src, char **dst)
74 {
75 	char *p = *dst, *q = *src;
76 
77 	switch (*q) {
78 	case 'n':
79 		*p = '\n';
80 		break;
81 	case 'r':
82 		*p = '\r';
83 		break;
84 	case 't':
85 		*p = '\t';
86 		break;
87 	case 'v':
88 		*p = '\v';
89 		break;
90 	case 'f':
91 		*p = '\f';
92 		break;
93 	default:
94 		return false;
95 	}
96 	*dst += 1;
97 	*src += 1;
98 	return true;
99 }
100 
101 static bool unescape_octal(char **src, char **dst)
102 {
103 	char *p = *dst, *q = *src;
104 	u8 num;
105 
106 	if (isodigit(*q) == 0)
107 		return false;
108 
109 	num = (*q++) & 7;
110 	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
111 		num <<= 3;
112 		num += (*q++) & 7;
113 	}
114 	*p = num;
115 	*dst += 1;
116 	*src = q;
117 	return true;
118 }
119 
120 static bool unescape_hex(char **src, char **dst)
121 {
122 	char *p = *dst, *q = *src;
123 	int digit;
124 	u8 num;
125 
126 	if (*q++ != 'x')
127 		return false;
128 
129 	num = digit = hex_to_bin(*q++);
130 	if (digit < 0)
131 		return false;
132 
133 	digit = hex_to_bin(*q);
134 	if (digit >= 0) {
135 		q++;
136 		num = (num << 4) | digit;
137 	}
138 	*p = num;
139 	*dst += 1;
140 	*src = q;
141 	return true;
142 }
143 
144 static bool unescape_special(char **src, char **dst)
145 {
146 	char *p = *dst, *q = *src;
147 
148 	switch (*q) {
149 	case '\"':
150 		*p = '\"';
151 		break;
152 	case '\\':
153 		*p = '\\';
154 		break;
155 	case 'a':
156 		*p = '\a';
157 		break;
158 	case 'e':
159 		*p = '\e';
160 		break;
161 	default:
162 		return false;
163 	}
164 	*dst += 1;
165 	*src += 1;
166 	return true;
167 }
168 
169 /**
170  * string_unescape - unquote characters in the given string
171  * @src:	source buffer (escaped)
172  * @dst:	destination buffer (unescaped)
173  * @size:	size of the destination buffer (0 to unlimit)
174  * @flags:	combination of the flags (bitwise OR):
175  *	%UNESCAPE_SPACE:
176  *		'\f' - form feed
177  *		'\n' - new line
178  *		'\r' - carriage return
179  *		'\t' - horizontal tab
180  *		'\v' - vertical tab
181  *	%UNESCAPE_OCTAL:
182  *		'\NNN' - byte with octal value NNN (1 to 3 digits)
183  *	%UNESCAPE_HEX:
184  *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
185  *	%UNESCAPE_SPECIAL:
186  *		'\"' - double quote
187  *		'\\' - backslash
188  *		'\a' - alert (BEL)
189  *		'\e' - escape
190  *	%UNESCAPE_ANY:
191  *		all previous together
192  *
193  * Description:
194  * The function unquotes characters in the given string.
195  *
196  * Because the size of the output will be the same as or less than the size of
197  * the input, the transformation may be performed in place.
198  *
199  * Caller must provide valid source and destination pointers. Be aware that
200  * destination buffer will always be NULL-terminated. Source string must be
201  * NULL-terminated as well.
202  *
203  * Return:
204  * The amount of the characters processed to the destination buffer excluding
205  * trailing '\0' is returned.
206  */
207 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
208 {
209 	char *out = dst;
210 
211 	while (*src && --size) {
212 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
213 			src++;
214 			size--;
215 
216 			if (flags & UNESCAPE_SPACE &&
217 					unescape_space(&src, &out))
218 				continue;
219 
220 			if (flags & UNESCAPE_OCTAL &&
221 					unescape_octal(&src, &out))
222 				continue;
223 
224 			if (flags & UNESCAPE_HEX &&
225 					unescape_hex(&src, &out))
226 				continue;
227 
228 			if (flags & UNESCAPE_SPECIAL &&
229 					unescape_special(&src, &out))
230 				continue;
231 
232 			*out++ = '\\';
233 		}
234 		*out++ = *src++;
235 	}
236 	*out = '\0';
237 
238 	return out - dst;
239 }
240 EXPORT_SYMBOL(string_unescape);
241 
242 static int escape_passthrough(unsigned char c, char **dst, size_t *osz)
243 {
244 	char *out = *dst;
245 
246 	if (*osz < 1)
247 		return -ENOMEM;
248 
249 	*out++ = c;
250 
251 	*dst = out;
252 	*osz -= 1;
253 
254 	return 1;
255 }
256 
257 static int escape_space(unsigned char c, char **dst, size_t *osz)
258 {
259 	char *out = *dst;
260 	unsigned char to;
261 
262 	if (*osz < 2)
263 		return -ENOMEM;
264 
265 	switch (c) {
266 	case '\n':
267 		to = 'n';
268 		break;
269 	case '\r':
270 		to = 'r';
271 		break;
272 	case '\t':
273 		to = 't';
274 		break;
275 	case '\v':
276 		to = 'v';
277 		break;
278 	case '\f':
279 		to = 'f';
280 		break;
281 	default:
282 		return 0;
283 	}
284 
285 	*out++ = '\\';
286 	*out++ = to;
287 
288 	*dst = out;
289 	*osz -= 2;
290 
291 	return 1;
292 }
293 
294 static int escape_special(unsigned char c, char **dst, size_t *osz)
295 {
296 	char *out = *dst;
297 	unsigned char to;
298 
299 	if (*osz < 2)
300 		return -ENOMEM;
301 
302 	switch (c) {
303 	case '\\':
304 		to = '\\';
305 		break;
306 	case '\a':
307 		to = 'a';
308 		break;
309 	case '\e':
310 		to = 'e';
311 		break;
312 	default:
313 		return 0;
314 	}
315 
316 	*out++ = '\\';
317 	*out++ = to;
318 
319 	*dst = out;
320 	*osz -= 2;
321 
322 	return 1;
323 }
324 
325 static int escape_null(unsigned char c, char **dst, size_t *osz)
326 {
327 	char *out = *dst;
328 
329 	if (*osz < 2)
330 		return -ENOMEM;
331 
332 	if (c)
333 		return 0;
334 
335 	*out++ = '\\';
336 	*out++ = '0';
337 
338 	*dst = out;
339 	*osz -= 2;
340 
341 	return 1;
342 }
343 
344 static int escape_octal(unsigned char c, char **dst, size_t *osz)
345 {
346 	char *out = *dst;
347 
348 	if (*osz < 4)
349 		return -ENOMEM;
350 
351 	*out++ = '\\';
352 	*out++ = ((c >> 6) & 0x07) + '0';
353 	*out++ = ((c >> 3) & 0x07) + '0';
354 	*out++ = ((c >> 0) & 0x07) + '0';
355 
356 	*dst = out;
357 	*osz -= 4;
358 
359 	return 1;
360 }
361 
362 static int escape_hex(unsigned char c, char **dst, size_t *osz)
363 {
364 	char *out = *dst;
365 
366 	if (*osz < 4)
367 		return -ENOMEM;
368 
369 	*out++ = '\\';
370 	*out++ = 'x';
371 	*out++ = hex_asc_hi(c);
372 	*out++ = hex_asc_lo(c);
373 
374 	*dst = out;
375 	*osz -= 4;
376 
377 	return 1;
378 }
379 
380 /**
381  * string_escape_mem - quote characters in the given memory buffer
382  * @src:	source buffer (unescaped)
383  * @isz:	source buffer size
384  * @dst:	destination buffer (escaped)
385  * @osz:	destination buffer size
386  * @flags:	combination of the flags (bitwise OR):
387  *	%ESCAPE_SPACE:
388  *		'\f' - form feed
389  *		'\n' - new line
390  *		'\r' - carriage return
391  *		'\t' - horizontal tab
392  *		'\v' - vertical tab
393  *	%ESCAPE_SPECIAL:
394  *		'\\' - backslash
395  *		'\a' - alert (BEL)
396  *		'\e' - escape
397  *	%ESCAPE_NULL:
398  *		'\0' - null
399  *	%ESCAPE_OCTAL:
400  *		'\NNN' - byte with octal value NNN (3 digits)
401  *	%ESCAPE_ANY:
402  *		all previous together
403  *	%ESCAPE_NP:
404  *		escape only non-printable characters (checked by isprint)
405  *	%ESCAPE_ANY_NP:
406  *		all previous together
407  *	%ESCAPE_HEX:
408  *		'\xHH' - byte with hexadecimal value HH (2 digits)
409  * @esc:	NULL-terminated string of characters any of which, if found in
410  *		the source, has to be escaped
411  *
412  * Description:
413  * The process of escaping byte buffer includes several parts. They are applied
414  * in the following sequence.
415  *	1. The character is matched to the printable class, if asked, and in
416  *	   case of match it passes through to the output.
417  *	2. The character is not matched to the one from @esc string and thus
418  *	   must go as is to the output.
419  *	3. The character is checked if it falls into the class given by @flags.
420  *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
421  *	   character. Note that they actually can't go together, otherwise
422  *	   %ESCAPE_HEX will be ignored.
423  *
424  * Caller must provide valid source and destination pointers. Be aware that
425  * destination buffer will not be NULL-terminated, thus caller have to append
426  * it if needs.
427  *
428  * Return:
429  * The amount of the characters processed to the destination buffer, or
430  * %-ENOMEM if the size of buffer is not enough to put an escaped character is
431  * returned.
432  *
433  * Even in the case of error @dst pointer will be updated to point to the byte
434  * after the last processed character.
435  */
436 int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
437 		      unsigned int flags, const char *esc)
438 {
439 	char *out = *dst, *p = out;
440 	bool is_dict = esc && *esc;
441 	int ret = 0;
442 
443 	while (isz--) {
444 		unsigned char c = *src++;
445 
446 		/*
447 		 * Apply rules in the following sequence:
448 		 *	- the character is printable, when @flags has
449 		 *	  %ESCAPE_NP bit set
450 		 *	- the @esc string is supplied and does not contain a
451 		 *	  character under question
452 		 *	- the character doesn't fall into a class of symbols
453 		 *	  defined by given @flags
454 		 * In these cases we just pass through a character to the
455 		 * output buffer.
456 		 */
457 		if ((flags & ESCAPE_NP && isprint(c)) ||
458 		    (is_dict && !strchr(esc, c))) {
459 			/* do nothing */
460 		} else {
461 			if (flags & ESCAPE_SPACE) {
462 				ret = escape_space(c, &p, &osz);
463 				if (ret < 0)
464 					break;
465 				if (ret > 0)
466 					continue;
467 			}
468 
469 			if (flags & ESCAPE_SPECIAL) {
470 				ret = escape_special(c, &p, &osz);
471 				if (ret < 0)
472 					break;
473 				if (ret > 0)
474 					continue;
475 			}
476 
477 			if (flags & ESCAPE_NULL) {
478 				ret = escape_null(c, &p, &osz);
479 				if (ret < 0)
480 					break;
481 				if (ret > 0)
482 					continue;
483 			}
484 
485 			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
486 			if (flags & ESCAPE_OCTAL) {
487 				ret = escape_octal(c, &p, &osz);
488 				if (ret < 0)
489 					break;
490 				continue;
491 			}
492 			if (flags & ESCAPE_HEX) {
493 				ret = escape_hex(c, &p, &osz);
494 				if (ret < 0)
495 					break;
496 				continue;
497 			}
498 		}
499 
500 		ret = escape_passthrough(c, &p, &osz);
501 		if (ret < 0)
502 			break;
503 	}
504 
505 	*dst = p;
506 
507 	if (ret < 0)
508 		return ret;
509 
510 	return p - out;
511 }
512 EXPORT_SYMBOL(string_escape_mem);
513