xref: /illumos-gate/usr/src/tools/smatch/src/char.c (revision a61ed2ce7a86a4d6428f2a83eb4739fae945447e)
1 #include <string.h>
2 #include "target.h"
3 #include "lib.h"
4 #include "allocate.h"
5 #include "token.h"
6 #include "expression.h"
7 #include "char.h"
8 
9 static const char *parse_escape(const char *p, unsigned *val, const char *end, int bits, struct position pos)
10 {
11 	unsigned c = *p++;
12 	unsigned d;
13 	if (c != '\\') {
14 		*val = c;
15 		return p;
16 	}
17 
18 	c = *p++;
19 	switch (c) {
20 	case 'a': c = '\a'; break;
21 	case 'b': c = '\b'; break;
22 	case 't': c = '\t'; break;
23 	case 'n': c = '\n'; break;
24 	case 'v': c = '\v'; break;
25 	case 'f': c = '\f'; break;
26 	case 'r': c = '\r'; break;
27 	case 'e': c = '\e'; break;
28 	case 'x': {
29 		unsigned mask = -(1U << (bits - 4));
30 		for (c = 0; p < end; c = (c << 4) + d) {
31 			d = hexval(*p);
32 			if (d > 16)
33 				break;
34 			p++;
35 			if (c & mask) {
36 				warning(pos,
37 					"hex escape sequence out of range");
38 				mask = 0;
39 			}
40 		}
41 		break;
42 	}
43 	case '0'...'7': {
44 		if (p + 2 < end)
45 			end = p + 2;
46 		c -= '0';
47 		while (p < end && (d = *p - '0') < 8) {
48 			c = (c << 3) + d;
49 			p++;
50 		}
51 		if ((c & 0400) && bits < 9)
52 			warning(pos,
53 				"octal escape sequence out of range");
54 		break;
55 	}
56 	default:	/* everything else is left as is */
57 		warning(pos, "unknown escape sequence: '\\%c'", c);
58 		break;
59 	case '\\':
60 	case '\'':
61 	case '"':
62 	case '?':
63 		break;	/* those are legal, so no warnings */
64 	}
65 	*val = c & ~((~0U << (bits - 1)) << 1);
66 	return p;
67 }
68 
69 void get_char_constant(struct token *token, unsigned long long *val)
70 {
71 	const char *p = token->embedded, *end;
72 	unsigned v;
73 	int type = token_type(token);
74 	switch (type) {
75 	case TOKEN_CHAR:
76 	case TOKEN_WIDE_CHAR:
77 		p = token->string->data;
78 		end = p + token->string->length - 1;
79 		break;
80 	case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3:
81 		end = p + type - TOKEN_CHAR;
82 		break;
83 	default:
84 		end = p + type - TOKEN_WIDE_CHAR;
85 	}
86 	p = parse_escape(p, &v, end,
87 			type < TOKEN_WIDE_CHAR ? bits_in_char : wchar_ctype->bit_size, token->pos);
88 	if (p != end)
89 		warning(token->pos,
90 			"multi-character character constant");
91 	*val = v;
92 }
93 
94 struct token *get_string_constant(struct token *token, struct expression *expr)
95 {
96 	struct string *string = token->string;
97 	struct token *next = token->next, *done = NULL;
98 	int stringtype = token_type(token);
99 	int is_wide = stringtype == TOKEN_WIDE_STRING;
100 	static char buffer[MAX_STRING];
101 	int len = 0;
102 	int bits;
103 	int esc_count = 0;
104 
105 	while (!done) {
106 		switch (token_type(next)) {
107 		case TOKEN_WIDE_STRING:
108 			is_wide = 1;
109 		case TOKEN_STRING:
110 			next = next->next;
111 			break;
112 		default:
113 			done = next;
114 		}
115 	}
116 	bits = is_wide ? wchar_ctype->bit_size: bits_in_char;
117 	while (token != done) {
118 		unsigned v;
119 		const char *p = token->string->data;
120 		const char *end = p + token->string->length - 1;
121 		while (p < end) {
122 			if (*p == '\\')
123 				esc_count++;
124 			p = parse_escape(p, &v, end, bits, token->pos);
125 			if (len < MAX_STRING)
126 				buffer[len] = v;
127 			len++;
128 		}
129 		token = token->next;
130 	}
131 	if (len > MAX_STRING) {
132 		warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", len, MAX_STRING);
133 		len = MAX_STRING;
134 	}
135 
136 	if (esc_count || len >= string->length) {
137 		if (string->immutable || len >= string->length)	/* can't cannibalize */
138 			string = __alloc_string(len+1);
139 		string->length = len+1;
140 		memcpy(string->data, buffer, len);
141 		string->data[len] = '\0';
142 	}
143 	expr->string = string;
144 	expr->wide = is_wide;
145 	return token;
146 }
147