xref: /linux/lib/string_helpers.c (revision 9d796e66230205cd3366f5660387bd9ecca9d336)
1 /*
2  * Helpers for formatting and printing strings
3  *
4  * Copyright 31 August 2008 James Bottomley
5  * Copyright (C) 2013, Intel Corporation
6  */
7 #include <linux/kernel.h>
8 #include <linux/math64.h>
9 #include <linux/export.h>
10 #include <linux/ctype.h>
11 #include <linux/errno.h>
12 #include <linux/string.h>
13 #include <linux/string_helpers.h>
14 
15 /**
16  * string_get_size - get the size in the specified units
17  * @size:	The size to be converted
18  * @units:	units to use (powers of 1000 or 1024)
19  * @buf:	buffer to format to
20  * @len:	length of buffer
21  *
22  * This function returns a string formatted to 3 significant figures
23  * giving the size in the required units.  @buf should have room for
24  * at least 9 bytes and will always be zero terminated.
25  *
26  */
27 void string_get_size(u64 size, const enum string_size_units units,
28 		     char *buf, int len)
29 {
30 	static const char *const units_10[] = {
31 		"B", "kB", "MB", "GB", "TB", "PB", "EB"
32 	};
33 	static const char *const units_2[] = {
34 		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"
35 	};
36 	static const char *const *const units_str[] = {
37 		[STRING_UNITS_10] = units_10,
38 		[STRING_UNITS_2] = units_2,
39 	};
40 	static const unsigned int divisor[] = {
41 		[STRING_UNITS_10] = 1000,
42 		[STRING_UNITS_2] = 1024,
43 	};
44 	int i, j;
45 	u32 remainder = 0, sf_cap;
46 	char tmp[8];
47 
48 	tmp[0] = '\0';
49 	i = 0;
50 	if (size >= divisor[units]) {
51 		while (size >= divisor[units]) {
52 			remainder = do_div(size, divisor[units]);
53 			i++;
54 		}
55 
56 		sf_cap = size;
57 		for (j = 0; sf_cap*10 < 1000; j++)
58 			sf_cap *= 10;
59 
60 		if (j) {
61 			remainder *= 1000;
62 			remainder /= divisor[units];
63 			snprintf(tmp, sizeof(tmp), ".%03u", remainder);
64 			tmp[j+1] = '\0';
65 		}
66 	}
67 
68 	snprintf(buf, len, "%u%s %s", (u32)size,
69 		 tmp, units_str[units][i]);
70 }
71 EXPORT_SYMBOL(string_get_size);
72 
73 static bool unescape_space(char **src, char **dst)
74 {
75 	char *p = *dst, *q = *src;
76 
77 	switch (*q) {
78 	case 'n':
79 		*p = '\n';
80 		break;
81 	case 'r':
82 		*p = '\r';
83 		break;
84 	case 't':
85 		*p = '\t';
86 		break;
87 	case 'v':
88 		*p = '\v';
89 		break;
90 	case 'f':
91 		*p = '\f';
92 		break;
93 	default:
94 		return false;
95 	}
96 	*dst += 1;
97 	*src += 1;
98 	return true;
99 }
100 
101 static bool unescape_octal(char **src, char **dst)
102 {
103 	char *p = *dst, *q = *src;
104 	u8 num;
105 
106 	if (isodigit(*q) == 0)
107 		return false;
108 
109 	num = (*q++) & 7;
110 	while (num < 32 && isodigit(*q) && (q - *src < 3)) {
111 		num <<= 3;
112 		num += (*q++) & 7;
113 	}
114 	*p = num;
115 	*dst += 1;
116 	*src = q;
117 	return true;
118 }
119 
120 static bool unescape_hex(char **src, char **dst)
121 {
122 	char *p = *dst, *q = *src;
123 	int digit;
124 	u8 num;
125 
126 	if (*q++ != 'x')
127 		return false;
128 
129 	num = digit = hex_to_bin(*q++);
130 	if (digit < 0)
131 		return false;
132 
133 	digit = hex_to_bin(*q);
134 	if (digit >= 0) {
135 		q++;
136 		num = (num << 4) | digit;
137 	}
138 	*p = num;
139 	*dst += 1;
140 	*src = q;
141 	return true;
142 }
143 
144 static bool unescape_special(char **src, char **dst)
145 {
146 	char *p = *dst, *q = *src;
147 
148 	switch (*q) {
149 	case '\"':
150 		*p = '\"';
151 		break;
152 	case '\\':
153 		*p = '\\';
154 		break;
155 	case 'a':
156 		*p = '\a';
157 		break;
158 	case 'e':
159 		*p = '\e';
160 		break;
161 	default:
162 		return false;
163 	}
164 	*dst += 1;
165 	*src += 1;
166 	return true;
167 }
168 
169 /**
170  * string_unescape - unquote characters in the given string
171  * @src:	source buffer (escaped)
172  * @dst:	destination buffer (unescaped)
173  * @size:	size of the destination buffer (0 to unlimit)
174  * @flags:	combination of the flags (bitwise OR):
175  *	%UNESCAPE_SPACE:
176  *		'\f' - form feed
177  *		'\n' - new line
178  *		'\r' - carriage return
179  *		'\t' - horizontal tab
180  *		'\v' - vertical tab
181  *	%UNESCAPE_OCTAL:
182  *		'\NNN' - byte with octal value NNN (1 to 3 digits)
183  *	%UNESCAPE_HEX:
184  *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
185  *	%UNESCAPE_SPECIAL:
186  *		'\"' - double quote
187  *		'\\' - backslash
188  *		'\a' - alert (BEL)
189  *		'\e' - escape
190  *	%UNESCAPE_ANY:
191  *		all previous together
192  *
193  * Description:
194  * The function unquotes characters in the given string.
195  *
196  * Because the size of the output will be the same as or less than the size of
197  * the input, the transformation may be performed in place.
198  *
199  * Caller must provide valid source and destination pointers. Be aware that
200  * destination buffer will always be NULL-terminated. Source string must be
201  * NULL-terminated as well.
202  *
203  * Return:
204  * The amount of the characters processed to the destination buffer excluding
205  * trailing '\0' is returned.
206  */
207 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
208 {
209 	char *out = dst;
210 
211 	while (*src && --size) {
212 		if (src[0] == '\\' && src[1] != '\0' && size > 1) {
213 			src++;
214 			size--;
215 
216 			if (flags & UNESCAPE_SPACE &&
217 					unescape_space(&src, &out))
218 				continue;
219 
220 			if (flags & UNESCAPE_OCTAL &&
221 					unescape_octal(&src, &out))
222 				continue;
223 
224 			if (flags & UNESCAPE_HEX &&
225 					unescape_hex(&src, &out))
226 				continue;
227 
228 			if (flags & UNESCAPE_SPECIAL &&
229 					unescape_special(&src, &out))
230 				continue;
231 
232 			*out++ = '\\';
233 		}
234 		*out++ = *src++;
235 	}
236 	*out = '\0';
237 
238 	return out - dst;
239 }
240 EXPORT_SYMBOL(string_unescape);
241 
242 static bool escape_passthrough(unsigned char c, char **dst, char *end)
243 {
244 	char *out = *dst;
245 
246 	if (out < end)
247 		*out = c;
248 	*dst = out + 1;
249 	return true;
250 }
251 
252 static bool escape_space(unsigned char c, char **dst, char *end)
253 {
254 	char *out = *dst;
255 	unsigned char to;
256 
257 	switch (c) {
258 	case '\n':
259 		to = 'n';
260 		break;
261 	case '\r':
262 		to = 'r';
263 		break;
264 	case '\t':
265 		to = 't';
266 		break;
267 	case '\v':
268 		to = 'v';
269 		break;
270 	case '\f':
271 		to = 'f';
272 		break;
273 	default:
274 		return false;
275 	}
276 
277 	if (out < end)
278 		*out = '\\';
279 	++out;
280 	if (out < end)
281 		*out = to;
282 	++out;
283 
284 	*dst = out;
285 	return true;
286 }
287 
288 static bool escape_special(unsigned char c, char **dst, char *end)
289 {
290 	char *out = *dst;
291 	unsigned char to;
292 
293 	switch (c) {
294 	case '\\':
295 		to = '\\';
296 		break;
297 	case '\a':
298 		to = 'a';
299 		break;
300 	case '\e':
301 		to = 'e';
302 		break;
303 	default:
304 		return false;
305 	}
306 
307 	if (out < end)
308 		*out = '\\';
309 	++out;
310 	if (out < end)
311 		*out = to;
312 	++out;
313 
314 	*dst = out;
315 	return true;
316 }
317 
318 static bool escape_null(unsigned char c, char **dst, char *end)
319 {
320 	char *out = *dst;
321 
322 	if (c)
323 		return false;
324 
325 	if (out < end)
326 		*out = '\\';
327 	++out;
328 	if (out < end)
329 		*out = '0';
330 	++out;
331 
332 	*dst = out;
333 	return true;
334 }
335 
336 static bool escape_octal(unsigned char c, char **dst, char *end)
337 {
338 	char *out = *dst;
339 
340 	if (out < end)
341 		*out = '\\';
342 	++out;
343 	if (out < end)
344 		*out = ((c >> 6) & 0x07) + '0';
345 	++out;
346 	if (out < end)
347 		*out = ((c >> 3) & 0x07) + '0';
348 	++out;
349 	if (out < end)
350 		*out = ((c >> 0) & 0x07) + '0';
351 	++out;
352 
353 	*dst = out;
354 	return true;
355 }
356 
357 static bool escape_hex(unsigned char c, char **dst, char *end)
358 {
359 	char *out = *dst;
360 
361 	if (out < end)
362 		*out = '\\';
363 	++out;
364 	if (out < end)
365 		*out = 'x';
366 	++out;
367 	if (out < end)
368 		*out = hex_asc_hi(c);
369 	++out;
370 	if (out < end)
371 		*out = hex_asc_lo(c);
372 	++out;
373 
374 	*dst = out;
375 	return true;
376 }
377 
378 /**
379  * string_escape_mem - quote characters in the given memory buffer
380  * @src:	source buffer (unescaped)
381  * @isz:	source buffer size
382  * @dst:	destination buffer (escaped)
383  * @osz:	destination buffer size
384  * @flags:	combination of the flags (bitwise OR):
385  *	%ESCAPE_SPACE:
386  *		'\f' - form feed
387  *		'\n' - new line
388  *		'\r' - carriage return
389  *		'\t' - horizontal tab
390  *		'\v' - vertical tab
391  *	%ESCAPE_SPECIAL:
392  *		'\\' - backslash
393  *		'\a' - alert (BEL)
394  *		'\e' - escape
395  *	%ESCAPE_NULL:
396  *		'\0' - null
397  *	%ESCAPE_OCTAL:
398  *		'\NNN' - byte with octal value NNN (3 digits)
399  *	%ESCAPE_ANY:
400  *		all previous together
401  *	%ESCAPE_NP:
402  *		escape only non-printable characters (checked by isprint)
403  *	%ESCAPE_ANY_NP:
404  *		all previous together
405  *	%ESCAPE_HEX:
406  *		'\xHH' - byte with hexadecimal value HH (2 digits)
407  * @esc:	NULL-terminated string of characters any of which, if found in
408  *		the source, has to be escaped
409  *
410  * Description:
411  * The process of escaping byte buffer includes several parts. They are applied
412  * in the following sequence.
413  *	1. The character is matched to the printable class, if asked, and in
414  *	   case of match it passes through to the output.
415  *	2. The character is not matched to the one from @esc string and thus
416  *	   must go as is to the output.
417  *	3. The character is checked if it falls into the class given by @flags.
418  *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
419  *	   character. Note that they actually can't go together, otherwise
420  *	   %ESCAPE_HEX will be ignored.
421  *
422  * Caller must provide valid source and destination pointers. Be aware that
423  * destination buffer will not be NULL-terminated, thus caller have to append
424  * it if needs.
425  *
426  * Return:
427  * The total size of the escaped output that would be generated for
428  * the given input and flags. To check whether the output was
429  * truncated, compare the return value to osz. There is room left in
430  * dst for a '\0' terminator if and only if ret < osz.
431  */
432 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
433 		      unsigned int flags, const char *esc)
434 {
435 	char *p = dst;
436 	char *end = p + osz;
437 	bool is_dict = esc && *esc;
438 
439 	while (isz--) {
440 		unsigned char c = *src++;
441 
442 		/*
443 		 * Apply rules in the following sequence:
444 		 *	- the character is printable, when @flags has
445 		 *	  %ESCAPE_NP bit set
446 		 *	- the @esc string is supplied and does not contain a
447 		 *	  character under question
448 		 *	- the character doesn't fall into a class of symbols
449 		 *	  defined by given @flags
450 		 * In these cases we just pass through a character to the
451 		 * output buffer.
452 		 */
453 		if ((flags & ESCAPE_NP && isprint(c)) ||
454 		    (is_dict && !strchr(esc, c))) {
455 			/* do nothing */
456 		} else {
457 			if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
458 				continue;
459 
460 			if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
461 				continue;
462 
463 			if (flags & ESCAPE_NULL && escape_null(c, &p, end))
464 				continue;
465 
466 			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
467 			if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
468 				continue;
469 
470 			if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
471 				continue;
472 		}
473 
474 		escape_passthrough(c, &p, end);
475 	}
476 
477 	return p - dst;
478 }
479 EXPORT_SYMBOL(string_escape_mem);
480