xref: /illumos-gate/usr/src/tools/smatch/src/smatch_kernel_user_data.c (revision d70bcb7258b79267aad36309c42fd499e844458f)
1 /*
2  * Copyright (C) 2011 Dan Carpenter.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
16  */
17 
18 /*
19  * There are a couple checks that try to see if a variable
20  * comes from the user.  It would be better to unify them
21  * into one place.  Also it we should follow the data down
22  * the call paths.  Hence this file.
23  */
24 
25 #include "smatch.h"
26 #include "smatch_slist.h"
27 #include "smatch_extra.h"
28 
29 static int my_id;
30 static int my_call_id;
31 
32 STATE(called);
33 static bool func_gets_user_data;
34 
35 static const char * kstr_funcs[] = {
36 	"kstrtoull", "kstrtoll", "kstrtoul", "kstrtol", "kstrtouint",
37 	"kstrtoint", "kstrtou64", "kstrtos64", "kstrtou32", "kstrtos32",
38 	"kstrtou16", "kstrtos16", "kstrtou8", "kstrtos8", "kstrtoull_from_user"
39 	"kstrtoll_from_user", "kstrtoul_from_user", "kstrtol_from_user",
40 	"kstrtouint_from_user", "kstrtoint_from_user", "kstrtou16_from_user",
41 	"kstrtos16_from_user", "kstrtou8_from_user", "kstrtos8_from_user",
42 	"kstrtou64_from_user", "kstrtos64_from_user", "kstrtou32_from_user",
43 	"kstrtos32_from_user",
44 };
45 
46 static const char *returns_user_data[] = {
47 	"simple_strtol", "simple_strtoll", "simple_strtoul", "simple_strtoull",
48 	"kvm_register_read", "nlmsg_data", "nla_data", "memdup_user",
49 	"kmap_atomic", "skb_network_header",
50 };
51 
52 static void set_points_to_user_data(struct expression *expr);
53 
54 static struct stree *start_states;
55 static struct stree_stack *saved_stack;
56 static void save_start_states(struct statement *stmt)
57 {
58 	start_states = clone_stree(__get_cur_stree());
59 }
60 
61 static void free_start_states(void)
62 {
63 	free_stree(&start_states);
64 }
65 
66 static void match_save_states(struct expression *expr)
67 {
68 	push_stree(&saved_stack, start_states);
69 	start_states = NULL;
70 }
71 
72 static void match_restore_states(struct expression *expr)
73 {
74 	free_stree(&start_states);
75 	start_states = pop_stree(&saved_stack);
76 }
77 
78 static struct smatch_state *empty_state(struct sm_state *sm)
79 {
80 	return alloc_estate_empty();
81 }
82 
83 static void pre_merge_hook(struct sm_state *sm)
84 {
85 	struct smatch_state *user;
86 	struct smatch_state *extra;
87 	struct range_list *rl;
88 	sval_t dummy;
89 	sval_t sval_100;
90 
91 	sval_100.value = 100;
92 	sval_100.type = &int_ctype;
93 
94 	user = get_state(my_id, sm->name, sm->sym);
95 	if (!user)
96 		return;
97 	if (!__in_function_def && !estate_rl(sm->state)) {
98 		/*
99 		 * If the one side is capped and the other side is empty then
100 		 * let's just mark it as not-user data because the information
101 		 * isn't going to be useful.  How this looks is:
102 		 *
103 		 * if (user_var > trusted)
104 		 *	user_var = trusted;  <-- empty state
105 		 * else
106 		 *	<-- capped
107 		 *
108 		 * The problem is that sometimes things are capped to a literal
109 		 * and we'd like to keep the state in that case...  Ugh.  I've
110 		 * added a check which assumes that everything less than 100 is
111 		 * probably capped against a literal.
112 		 *
113 		 */
114 		if (is_capped_var_sym(sm->name, sm->sym) &&
115 		    sval_cmp(estate_max(user), sval_100) > 0)
116 			set_state(my_id, sm->name, sm->sym, alloc_estate_empty());
117 		return;
118 	}
119 	extra = get_state(SMATCH_EXTRA, sm->name, sm->sym);
120 	if (!extra || !estate_rl(extra))
121 		return;
122 	rl = rl_intersection(estate_rl(user), estate_rl(extra));
123 	if (rl_to_sval(rl, &dummy))
124 		rl = NULL;
125 	set_state(my_id, sm->name, sm->sym, alloc_estate_rl(clone_rl(rl)));
126 }
127 
128 static void extra_nomod_hook(const char *name, struct symbol *sym, struct expression *expr, struct smatch_state *state)
129 {
130 	struct smatch_state *user;
131 	struct range_list *rl;
132 
133 	user = get_state(my_id, name, sym);
134 	if (!user)
135 		return;
136 	rl = rl_intersection(estate_rl(user), estate_rl(state));
137 	if (rl_equiv(rl, estate_rl(user)))
138 		return;
139 	set_state(my_id, name, sym, alloc_estate_rl(rl));
140 }
141 
142 static void tag_inner_struct_members(struct expression *expr, struct symbol *member)
143 {
144 	struct expression *edge_member;
145 	struct symbol *base = get_real_base_type(member);
146 	struct symbol *tmp;
147 
148 	if (member->ident)
149 		expr = member_expression(expr, '.', member->ident);
150 
151 	FOR_EACH_PTR(base->symbol_list, tmp) {
152 		struct symbol *type;
153 
154 		type = get_real_base_type(tmp);
155 		if (!type)
156 			continue;
157 
158 		if (type->type == SYM_UNION || type->type == SYM_STRUCT) {
159 			tag_inner_struct_members(expr, tmp);
160 			continue;
161 		}
162 
163 		if (!tmp->ident)
164 			continue;
165 
166 		edge_member = member_expression(expr, '.', tmp->ident);
167 		set_state_expr(my_id, edge_member, alloc_estate_whole(type));
168 	} END_FOR_EACH_PTR(tmp);
169 }
170 
171 static void tag_struct_members(struct symbol *type, struct expression *expr)
172 {
173 	struct symbol *tmp;
174 	struct expression *member;
175 	int op = '*';
176 
177 	if (expr->type == EXPR_PREOP && expr->op == '&') {
178 		expr = strip_expr(expr->unop);
179 		op = '.';
180 	}
181 
182 	FOR_EACH_PTR(type->symbol_list, tmp) {
183 		type = get_real_base_type(tmp);
184 		if (!type)
185 			continue;
186 
187 		if (type->type == SYM_UNION || type->type == SYM_STRUCT) {
188 			tag_inner_struct_members(expr, tmp);
189 			continue;
190 		}
191 
192 		if (!tmp->ident)
193 			continue;
194 
195 		member = member_expression(expr, op, tmp->ident);
196 		set_state_expr(my_id, member, alloc_estate_whole(get_type(member)));
197 
198 		if (type->type == SYM_ARRAY)
199 			set_points_to_user_data(member);
200 	} END_FOR_EACH_PTR(tmp);
201 }
202 
203 static void tag_base_type(struct expression *expr)
204 {
205 	if (expr->type == EXPR_PREOP && expr->op == '&')
206 		expr = strip_expr(expr->unop);
207 	else
208 		expr = deref_expression(expr);
209 	set_state_expr(my_id, expr, alloc_estate_whole(get_type(expr)));
210 }
211 
212 static void tag_as_user_data(struct expression *expr)
213 {
214 	struct symbol *type;
215 
216 	expr = strip_expr(expr);
217 
218 	type = get_type(expr);
219 	if (!type || type->type != SYM_PTR)
220 		return;
221 	type = get_real_base_type(type);
222 	if (!type)
223 		return;
224 	if (type == &void_ctype) {
225 		set_state_expr(my_id, deref_expression(expr), alloc_estate_whole(&ulong_ctype));
226 		return;
227 	}
228 	if (type->type == SYM_BASETYPE)
229 		tag_base_type(expr);
230 	if (type->type == SYM_STRUCT || type->type == SYM_UNION) {
231 		if (expr->type != EXPR_PREOP || expr->op != '&')
232 			expr = deref_expression(expr);
233 		else
234 			set_state_expr(my_id, deref_expression(expr), alloc_estate_whole(&ulong_ctype));
235 		tag_struct_members(type, expr);
236 	}
237 }
238 
239 static void match_user_copy(const char *fn, struct expression *expr, void *_param)
240 {
241 	int param = PTR_INT(_param);
242 	struct expression *dest;
243 
244 	func_gets_user_data = true;
245 
246 	dest = get_argument_from_call_expr(expr->args, param);
247 	dest = strip_expr(dest);
248 	if (!dest)
249 		return;
250 	tag_as_user_data(dest);
251 }
252 
253 static int is_dev_attr_name(struct expression *expr)
254 {
255 	char *name;
256 	int ret = 0;
257 
258 	name = expr_to_str(expr);
259 	if (!name)
260 		return 0;
261 	if (strstr(name, "->attr.name"))
262 		ret = 1;
263 	free_string(name);
264 	return ret;
265 }
266 
267 static int ends_in_n(struct expression *expr)
268 {
269 	struct string *str;
270 
271 	if (!expr)
272 		return 0;
273 	if (expr->type != EXPR_STRING || !expr->string)
274 		return 0;
275 
276 	str = expr->string;
277 	if (str->length < 3)
278 		return 0;
279 
280 	if (str->data[str->length - 3] == '%' &&
281 	    str->data[str->length - 2] == 'n')
282 		return 1;
283 	return 0;
284 }
285 
286 static void match_sscanf(const char *fn, struct expression *expr, void *unused)
287 {
288 	struct expression *str, *format, *arg;
289 	int i, last;
290 
291 	func_gets_user_data = true;
292 
293 	str = get_argument_from_call_expr(expr->args, 0);
294 	if (is_dev_attr_name(str))
295 		return;
296 
297 	format = get_argument_from_call_expr(expr->args, 1);
298 	if (is_dev_attr_name(format))
299 		return;
300 
301 	last = ptr_list_size((struct ptr_list *)expr->args) - 1;
302 
303 	i = -1;
304 	FOR_EACH_PTR(expr->args, arg) {
305 		i++;
306 		if (i < 2)
307 			continue;
308 		if (i == last && ends_in_n(format))
309 			continue;
310 		tag_as_user_data(arg);
311 	} END_FOR_EACH_PTR(arg);
312 }
313 
314 static int is_skb_data(struct expression *expr)
315 {
316 	struct symbol *sym;
317 
318 	if (!expr)
319 		return 0;
320 
321 	if (expr->type == EXPR_BINOP && expr->op == '+')
322 		return is_skb_data(expr->left);
323 
324 	expr = strip_expr(expr);
325 	if (!expr)
326 		return 0;
327 	if (expr->type != EXPR_DEREF || expr->op != '.')
328 		return 0;
329 
330 	if (!expr->member)
331 		return 0;
332 	if (strcmp(expr->member->name, "data") != 0)
333 		return 0;
334 
335 	sym = expr_to_sym(expr->deref);
336 	if (!sym)
337 		return 0;
338 	sym = get_real_base_type(sym);
339 	if (!sym || sym->type != SYM_PTR)
340 		return 0;
341 	sym = get_real_base_type(sym);
342 	if (!sym || sym->type != SYM_STRUCT || !sym->ident)
343 		return 0;
344 	if (strcmp(sym->ident->name, "sk_buff") != 0)
345 		return 0;
346 
347 	return 1;
348 }
349 
350 static int get_rl_from_function(struct expression *expr, struct range_list **rl)
351 {
352 	int i;
353 
354 	if (expr->type != EXPR_CALL || expr->fn->type != EXPR_SYMBOL ||
355 	    !expr->fn->symbol_name || !expr->fn->symbol_name->name)
356 		return 0;
357 
358 	for (i = 0; i < ARRAY_SIZE(returns_user_data); i++) {
359 		if (strcmp(expr->fn->symbol_name->name, returns_user_data[i]) == 0) {
360 			*rl = alloc_whole_rl(get_type(expr));
361 			return 1;
362 		}
363 	}
364 	return 0;
365 }
366 
367 int points_to_user_data(struct expression *expr)
368 {
369 	struct smatch_state *state;
370 	struct range_list *rl;
371 	char buf[256];
372 	struct symbol *sym;
373 	char *name;
374 	int ret = 0;
375 
376 	expr = strip_expr(expr);
377 	if (!expr)
378 		return 0;
379 	if (is_skb_data(expr))
380 		return 1;
381 	if (get_rl_from_function(expr, &rl))
382 		return 1;
383 
384 	if (expr->type == EXPR_BINOP && expr->op == '+') {
385 		if (points_to_user_data(expr->left))
386 			return 1;
387 		if (points_to_user_data(expr->right))
388 			return 1;
389 		return 0;
390 	}
391 
392 	name = expr_to_var_sym(expr, &sym);
393 	if (!name || !sym)
394 		goto free;
395 	snprintf(buf, sizeof(buf), "*%s", name);
396 	state = get_state(my_id, buf, sym);
397 	if (state && estate_rl(state))
398 		ret = 1;
399 free:
400 	free_string(name);
401 	return ret;
402 }
403 
404 static void set_points_to_user_data(struct expression *expr)
405 {
406 	char *name;
407 	struct symbol *sym;
408 	char buf[256];
409 
410 	name = expr_to_var_sym(expr, &sym);
411 	if (!name || !sym)
412 		goto free;
413 	snprintf(buf, sizeof(buf), "*%s", name);
414 	set_state(my_id, buf, sym, alloc_estate_whole(&llong_ctype));
415 free:
416 	free_string(name);
417 }
418 
419 static int comes_from_skb_data(struct expression *expr)
420 {
421 	expr = strip_expr(expr);
422 	if (!expr || expr->type != EXPR_PREOP || expr->op != '*')
423 		return 0;
424 
425 	expr = strip_expr(expr->unop);
426 	if (!expr)
427 		return 0;
428 	if (expr->type == EXPR_BINOP && expr->op == '+')
429 		expr = strip_expr(expr->left);
430 
431 	return is_skb_data(expr);
432 }
433 
434 static int handle_struct_assignment(struct expression *expr)
435 {
436 	struct expression *right;
437 	struct symbol *left_type, *right_type;
438 
439 	left_type = get_type(expr->left);
440 	if (!left_type || left_type->type != SYM_PTR)
441 		return 0;
442 	left_type = get_real_base_type(left_type);
443 	if (!left_type)
444 		return 0;
445 	if (left_type->type != SYM_STRUCT &&
446 	    left_type->type != SYM_UNION)
447 		return 0;
448 
449 	/*
450 	 * Ignore struct to struct assignments because for those we look at the
451 	 * individual members.
452 	 */
453 	right = strip_expr(expr->right);
454 	right_type = get_type(right);
455 	if (!right_type || right_type->type != SYM_PTR)
456 		return 0;
457 
458 	/* If we are assigning struct members then normally that is handled
459 	 * by fake assignments, however if we cast one struct to a different
460 	 * of struct then we handle that here.
461 	 */
462 	right_type = get_real_base_type(right_type);
463 	if (right_type == left_type)
464 		return 0;
465 
466 	if (!points_to_user_data(right))
467 		return 0;
468 
469 	tag_as_user_data(expr->left);
470 	return 1;
471 }
472 
473 static int handle_get_user(struct expression *expr)
474 {
475 	char *name;
476 	int ret = 0;
477 
478 	name = get_macro_name(expr->pos);
479 	if (!name || strcmp(name, "get_user") != 0)
480 		return 0;
481 
482 	name = expr_to_var(expr->right);
483 	if (!name || strcmp(name, "__val_gu") != 0)
484 		goto free;
485 	set_state_expr(my_id, expr->left, alloc_estate_whole(get_type(expr->left)));
486 	ret = 1;
487 free:
488 	free_string(name);
489 	return ret;
490 }
491 
492 static void match_assign(struct expression *expr)
493 {
494 	struct range_list *rl;
495 
496 	if (is_fake_call(expr->right))
497 		goto clear_old_state;
498 	if (handle_get_user(expr))
499 		return;
500 	if (points_to_user_data(expr->right))
501 		set_points_to_user_data(expr->left);
502 	if (handle_struct_assignment(expr))
503 		return;
504 
505 	if (!get_user_rl(expr->right, &rl))
506 		goto clear_old_state;
507 
508 	rl = cast_rl(get_type(expr->left), rl);
509 	set_state_expr(my_id, expr->left, alloc_estate_rl(rl));
510 
511 	return;
512 
513 clear_old_state:
514 	if (get_state_expr(my_id, expr->left))
515 		set_state_expr(my_id, expr->left, alloc_estate_empty());
516 }
517 
518 static void handle_eq_noteq(struct expression *expr)
519 {
520 	struct smatch_state *left_orig, *right_orig;
521 
522 	left_orig = get_state_expr(my_id, expr->left);
523 	right_orig = get_state_expr(my_id, expr->right);
524 
525 	if (!left_orig && !right_orig)
526 		return;
527 	if (left_orig && right_orig)
528 		return;
529 
530 	if (left_orig) {
531 		set_true_false_states_expr(my_id, expr->left,
532 				expr->op == SPECIAL_EQUAL ? alloc_estate_empty() : NULL,
533 				expr->op == SPECIAL_EQUAL ? NULL : alloc_estate_empty());
534 	} else {
535 		set_true_false_states_expr(my_id, expr->right,
536 				expr->op == SPECIAL_EQUAL ? alloc_estate_empty() : NULL,
537 				expr->op == SPECIAL_EQUAL ? NULL : alloc_estate_empty());
538 	}
539 }
540 
541 static void handle_unsigned_lt_gt(struct expression *expr)
542 {
543 	struct symbol *type;
544 	struct range_list *left;
545 	struct range_list *right;
546 	struct range_list *non_negative;
547 	sval_t min, minus_one;
548 
549 	/*
550 	 * conditions are mostly handled by smatch_extra.c.  The special case
551 	 * here is that say you have if (user_int < unknown_u32) {
552 	 * In Smatch extra we say that, We have no idea what value
553 	 * unknown_u32 is so the only thin we can say for sure is that
554 	 * user_int is not -1 (UINT_MAX).  But in check_user_data2.c we should
555 	 * assume that unless unknown_u32 is user data, it's probably less than
556 	 * INT_MAX.
557 	 *
558 	 */
559 
560 	type = get_type(expr);
561 	if (!type_unsigned(type))
562 		return;
563 
564 	/*
565 	 * Assume if (user < trusted) { ... because I am lazy and because this
566 	 * is the correct way to write code.
567 	 */
568 	if (!get_user_rl(expr->left, &left))
569 		return;
570 	if (get_user_rl(expr->right, &right))
571 		return;
572 
573 	if (!sval_is_negative(rl_min(left)))
574 		return;
575 	min = rl_min(left);
576 	minus_one.type = rl_type(left);
577 	minus_one.value = -1;
578 	non_negative = remove_range(left, min, minus_one);
579 
580 	switch (expr->op) {
581 	case '<':
582 	case SPECIAL_UNSIGNED_LT:
583 	case SPECIAL_LTE:
584 	case SPECIAL_UNSIGNED_LTE:
585 		set_true_false_states_expr(my_id, expr->left,
586 					   alloc_estate_rl(non_negative), NULL);
587 		break;
588 	case '>':
589 	case SPECIAL_UNSIGNED_GT:
590 	case SPECIAL_GTE:
591 	case SPECIAL_UNSIGNED_GTE:
592 		set_true_false_states_expr(my_id, expr->left,
593 					   NULL, alloc_estate_rl(non_negative));
594 		break;
595 	}
596 }
597 
598 static void match_condition(struct expression *expr)
599 {
600 	if (expr->type != EXPR_COMPARE)
601 		return;
602 
603 	if (expr->op == SPECIAL_EQUAL ||
604 	    expr->op == SPECIAL_NOTEQUAL) {
605 		handle_eq_noteq(expr);
606 		return;
607 	}
608 
609 	handle_unsigned_lt_gt(expr);
610 }
611 
612 static void match_user_assign_function(const char *fn, struct expression *expr, void *unused)
613 {
614 	tag_as_user_data(expr->left);
615 	set_points_to_user_data(expr->left);
616 }
617 
618 static void match_returns_user_rl(const char *fn, struct expression *expr, void *unused)
619 {
620 	func_gets_user_data = true;
621 }
622 
623 static int get_user_macro_rl(struct expression *expr, struct range_list **rl)
624 {
625 	struct expression *parent;
626 	char *macro;
627 
628 	if (!expr)
629 		return 0;
630 
631 	macro = get_macro_name(expr->pos);
632 	if (!macro)
633 		return 0;
634 
635 	/* handle ntohl(foo[i]) where "i" is trusted */
636 	parent = expr_get_parent_expr(expr);
637 	while (parent && parent->type != EXPR_BINOP)
638 		parent = expr_get_parent_expr(parent);
639 	if (parent && parent->type == EXPR_BINOP) {
640 		char *parent_macro = get_macro_name(parent->pos);
641 
642 		if (parent_macro && strcmp(macro, parent_macro) == 0)
643 			return 0;
644 	}
645 
646 	if (strcmp(macro, "ntohl") == 0) {
647 		*rl = alloc_whole_rl(&uint_ctype);
648 		return 1;
649 	}
650 	if (strcmp(macro, "ntohs") == 0) {
651 		*rl = alloc_whole_rl(&ushort_ctype);
652 		return 1;
653 	}
654 	return 0;
655 }
656 
657 struct db_info {
658 	struct range_list *rl;
659 	struct expression *call;
660 };
661 static int returned_rl_callback(void *_info, int argc, char **argv, char **azColName)
662 {
663 	struct db_info *db_info = _info;
664 	struct range_list *rl;
665 	char *return_ranges = argv[0];
666 	char *user_ranges = argv[1];
667 	struct expression *arg;
668 	int comparison;
669 
670 	if (argc != 2)
671 		return 0;
672 
673 	call_results_to_rl(db_info->call, get_type(db_info->call), user_ranges, &rl);
674 	if (str_to_comparison_arg(return_ranges, db_info->call, &comparison, &arg) &&
675 	    comparison == SPECIAL_EQUAL) {
676 		struct range_list *orig_rl;
677 
678 		if (!get_user_rl(arg, &orig_rl))
679 			return 0;
680 		rl = rl_intersection(rl, orig_rl);
681 		if (!rl)
682 			return 0;
683 	}
684 	db_info->rl = rl_union(db_info->rl, rl);
685 
686 	return 0;
687 }
688 
689 static int has_user_data(struct symbol *sym)
690 {
691 	struct sm_state *tmp;
692 
693 	FOR_EACH_MY_SM(my_id, __get_cur_stree(), tmp) {
694 		if (tmp->sym == sym)
695 			return 1;
696 	} END_FOR_EACH_SM(tmp);
697 	return 0;
698 }
699 
700 static int we_pass_user_data(struct expression *call)
701 {
702 	struct expression *arg;
703 	struct symbol *sym;
704 
705 	FOR_EACH_PTR(call->args, arg) {
706 		sym = expr_to_sym(arg);
707 		if (!sym)
708 			continue;
709 		if (has_user_data(sym))
710 			return 1;
711 	} END_FOR_EACH_PTR(arg);
712 
713 	return 0;
714 }
715 
716 static int db_returned_user_rl(struct expression *call, struct range_list **rl)
717 {
718 	struct db_info db_info = {};
719 
720 	/* for function pointers assume everything is used */
721 	if (call->fn->type != EXPR_SYMBOL)
722 		return 0;
723 	if (is_fake_call(call))
724 		return 0;
725 
726 	db_info.call = call;
727 	run_sql(&returned_rl_callback, &db_info,
728 		"select return, value from return_states where %s and type = %d and parameter = -1 and key = '$';",
729 		get_static_filter(call->fn->symbol), USER_DATA3_SET);
730 	if (db_info.rl) {
731 		func_gets_user_data = true;
732 		*rl = db_info.rl;
733 		return 1;
734 	}
735 
736 	run_sql(&returned_rl_callback, &db_info,
737 		"select return, value from return_states where %s and type = %d and parameter = -1 and key = '$';",
738 		get_static_filter(call->fn->symbol), USER_DATA3);
739 	if (db_info.rl) {
740 		if (!we_pass_user_data(call))
741 			return 0;
742 		*rl = db_info.rl;
743 		return 1;
744 	}
745 
746 	return 0;
747 }
748 
749 struct stree *get_user_stree(void)
750 {
751 	return get_all_states_stree(my_id);
752 }
753 
754 static int user_data_flag;
755 static int no_user_data_flag;
756 static struct range_list *var_user_rl(struct expression *expr)
757 {
758 	struct smatch_state *state;
759 	struct range_list *rl;
760 	struct range_list *absolute_rl;
761 
762 	if (expr->type == EXPR_BINOP && expr->op == '%') {
763 		struct range_list *left, *right;
764 
765 		if (!get_user_rl(expr->right, &right))
766 			return NULL;
767 		get_absolute_rl(expr->left, &left);
768 		rl = rl_binop(left, '%', right);
769 		goto found;
770 	}
771 
772 	if (!option_spammy && expr->type == EXPR_BINOP && expr->op == '/') {
773 		struct range_list *left = NULL;
774 		struct range_list *right = NULL;
775 		struct range_list *abs_right;
776 
777 		/*
778 		 * The specific bug I'm dealing with is:
779 		 *
780 		 * foo = capped_user / unknown;
781 		 *
782 		 * Instead of just saying foo is now entirely user_rl we should
783 		 * probably say instead that it is not at all user data.
784 		 *
785 		 */
786 
787 		get_user_rl(expr->left, &left);
788 		get_user_rl(expr->right, &right);
789 		get_absolute_rl(expr->right, &abs_right);
790 
791 		if (left && !right) {
792 			rl = rl_binop(left, '/', abs_right);
793 			if (sval_cmp(rl_max(left), rl_max(rl)) < 0)
794 				no_user_data_flag = 1;
795 		}
796 
797 		return NULL;
798 	}
799 
800 	if (get_rl_from_function(expr, &rl))
801 		goto found;
802 
803 	if (get_user_macro_rl(expr, &rl))
804 		goto found;
805 
806 	if (comes_from_skb_data(expr)) {
807 		rl = alloc_whole_rl(get_type(expr));
808 		goto found;
809 	}
810 
811 	state = get_state_expr(my_id, expr);
812 	if (state && estate_rl(state)) {
813 		rl = estate_rl(state);
814 		goto found;
815 	}
816 
817 	if (expr->type == EXPR_CALL && db_returned_user_rl(expr, &rl))
818 		goto found;
819 
820 	if (is_array(expr)) {
821 		struct expression *array = get_array_base(expr);
822 
823 		if (!get_state_expr(my_id, array)) {
824 			no_user_data_flag = 1;
825 			return NULL;
826 		}
827 	}
828 
829 	if (expr->type == EXPR_PREOP && expr->op == '*' &&
830 	    is_user_rl(expr->unop)) {
831 		rl = var_to_absolute_rl(expr);
832 		goto found;
833 	}
834 
835 	return NULL;
836 found:
837 	user_data_flag = 1;
838 	absolute_rl = var_to_absolute_rl(expr);
839 	return clone_rl(rl_intersection(rl, absolute_rl));
840 }
841 
842 int get_user_rl(struct expression *expr, struct range_list **rl)
843 {
844 	user_data_flag = 0;
845 	no_user_data_flag = 0;
846 	custom_get_absolute_rl(expr, &var_user_rl, rl);
847 	if (!user_data_flag || no_user_data_flag)
848 		*rl = NULL;
849 
850 	return !!*rl;
851 }
852 
853 int get_user_rl_spammy(struct expression *expr, struct range_list **rl)
854 {
855 	int ret;
856 
857 	option_spammy++;
858 	ret = get_user_rl(expr, rl);
859 	option_spammy--;
860 
861 	return ret;
862 }
863 
864 int is_user_rl(struct expression *expr)
865 {
866 	struct range_list *tmp;
867 
868 	return get_user_rl_spammy(expr, &tmp);
869 }
870 
871 int get_user_rl_var_sym(const char *name, struct symbol *sym, struct range_list **rl)
872 {
873 	struct smatch_state *state;
874 
875 	state = get_state(my_id, name, sym);
876 	if (state && estate_rl(state)) {
877 		*rl = estate_rl(state);
878 		return 1;
879 	}
880 	return 0;
881 }
882 
883 static void match_call_info(struct expression *expr)
884 {
885 	struct range_list *rl;
886 	struct expression *arg;
887 	struct symbol *type;
888 	int i = 0;
889 
890 	i = -1;
891 	FOR_EACH_PTR(expr->args, arg) {
892 		i++;
893 		type = get_arg_type(expr->fn, i);
894 
895 		if (!get_user_rl(arg, &rl))
896 			continue;
897 
898 		rl = cast_rl(type, rl);
899 		sql_insert_caller_info(expr, USER_DATA3, i, "$", show_rl(rl));
900 	} END_FOR_EACH_PTR(arg);
901 }
902 
903 static int is_struct_ptr(struct symbol *sym)
904 {
905 	struct symbol *type;
906 
907 	if (!sym)
908 		return 0;
909 	type = get_real_base_type(sym);
910 	if (!type || type->type != SYM_PTR)
911 		return 0;
912 	type = get_real_base_type(type);
913 	if (!type || type->type != SYM_STRUCT)
914 		return 0;
915 	return 1;
916 }
917 
918 static void struct_member_callback(struct expression *call, int param, char *printed_name, struct sm_state *sm)
919 {
920 	struct smatch_state *state;
921 	struct range_list *rl;
922 	struct symbol *type;
923 
924 	/*
925 	 * Smatch uses a hack where if we get an unsigned long we say it's
926 	 * both user data and it points to user data.  But if we pass it to a
927 	 * function which takes an int, then it's just user data.  There's not
928 	 * enough bytes for it to be a pointer.
929 	 *
930 	 */
931 	type = get_arg_type(call->fn, param);
932 	if (type && type_bits(type) < type_bits(&ptr_ctype))
933 		return;
934 
935 	if (strcmp(sm->state->name, "") == 0)
936 		return;
937 
938 	if (strcmp(printed_name, "*$") == 0 &&
939 	    is_struct_ptr(sm->sym))
940 		return;
941 
942 	state = get_state(SMATCH_EXTRA, sm->name, sm->sym);
943 	if (!state || !estate_rl(state))
944 		rl = estate_rl(sm->state);
945 	else
946 		rl = rl_intersection(estate_rl(sm->state), estate_rl(state));
947 
948 	sql_insert_caller_info(call, USER_DATA3, param, printed_name, show_rl(rl));
949 }
950 
951 static void set_param_user_data(const char *name, struct symbol *sym, char *key, char *value)
952 {
953 	struct range_list *rl = NULL;
954 	struct smatch_state *state;
955 	struct symbol *type;
956 	char fullname[256];
957 
958 	if (strcmp(key, "*$") == 0)
959 		snprintf(fullname, sizeof(fullname), "*%s", name);
960 	else if (strncmp(key, "$", 1) == 0)
961 		snprintf(fullname, 256, "%s%s", name, key + 1);
962 	else
963 		return;
964 
965 	type = get_member_type_from_key(symbol_expression(sym), key);
966 
967 	/* if the caller passes a void pointer with user data */
968 	if (strcmp(key, "*$") == 0 && type && type != &void_ctype) {
969 		struct expression *expr = symbol_expression(sym);
970 
971 		tag_as_user_data(expr);
972 		set_points_to_user_data(expr);
973 		return;
974 	}
975 	str_to_rl(type, value, &rl);
976 	state = alloc_estate_rl(rl);
977 	set_state(my_id, fullname, sym, state);
978 }
979 
980 static void set_called(const char *name, struct symbol *sym, char *key, char *value)
981 {
982 	set_state(my_call_id, "this_function", NULL, &called);
983 }
984 
985 static void match_syscall_definition(struct symbol *sym)
986 {
987 	struct symbol *arg;
988 	char *macro;
989 	char *name;
990 	int is_syscall = 0;
991 
992 	macro = get_macro_name(sym->pos);
993 	if (macro &&
994 	    (strncmp("SYSCALL_DEFINE", macro, strlen("SYSCALL_DEFINE")) == 0 ||
995 	     strncmp("COMPAT_SYSCALL_DEFINE", macro, strlen("COMPAT_SYSCALL_DEFINE")) == 0))
996 		is_syscall = 1;
997 
998 	name = get_function();
999 	if (!option_no_db && get_state(my_call_id, "this_function", NULL) != &called) {
1000 		if (name && strncmp(name, "sys_", 4) == 0)
1001 			is_syscall = 1;
1002 	}
1003 
1004 	if (name && strncmp(name, "compat_sys_", 11) == 0)
1005 		is_syscall = 1;
1006 
1007 	if (!is_syscall)
1008 		return;
1009 
1010 	FOR_EACH_PTR(sym->ctype.base_type->arguments, arg) {
1011 		set_state(my_id, arg->ident->name, arg, alloc_estate_whole(get_real_base_type(arg)));
1012 	} END_FOR_EACH_PTR(arg);
1013 }
1014 
1015 static void set_to_user_data(struct expression *expr, char *key, char *value)
1016 {
1017 	char *name;
1018 	struct symbol *sym;
1019 	struct symbol *type;
1020 	struct range_list *rl = NULL;
1021 
1022 	type = get_member_type_from_key(expr, key);
1023 	name = get_variable_from_key(expr, key, &sym);
1024 	if (!name || !sym)
1025 		goto free;
1026 
1027 	call_results_to_rl(expr, type, value, &rl);
1028 
1029 	set_state(my_id, name, sym, alloc_estate_rl(rl));
1030 free:
1031 	free_string(name);
1032 
1033 }
1034 
1035 static void returns_param_user_data(struct expression *expr, int param, char *key, char *value)
1036 {
1037 	struct expression *arg;
1038 	struct expression *call;
1039 
1040 	call = expr;
1041 	while (call->type == EXPR_ASSIGNMENT)
1042 		call = strip_expr(call->right);
1043 	if (call->type != EXPR_CALL)
1044 		return;
1045 
1046 	if (!we_pass_user_data(call))
1047 		return;
1048 
1049 	if (param == -1) {
1050 		if (expr->type != EXPR_ASSIGNMENT)
1051 			return;
1052 		set_to_user_data(expr->left, key, value);
1053 		return;
1054 	}
1055 
1056 	arg = get_argument_from_call_expr(call->args, param);
1057 	if (!arg)
1058 		return;
1059 	set_to_user_data(arg, key, value);
1060 }
1061 
1062 static void returns_param_user_data_set(struct expression *expr, int param, char *key, char *value)
1063 {
1064 	struct expression *arg;
1065 
1066 	func_gets_user_data = true;
1067 
1068 	if (param == -1) {
1069 		if (expr->type != EXPR_ASSIGNMENT)
1070 			return;
1071 		if (strcmp(key, "*$") == 0) {
1072 			set_points_to_user_data(expr->left);
1073 			tag_as_user_data(expr->left);
1074 		} else {
1075 			set_to_user_data(expr->left, key, value);
1076 		}
1077 		return;
1078 	}
1079 
1080 	while (expr->type == EXPR_ASSIGNMENT)
1081 		expr = strip_expr(expr->right);
1082 	if (expr->type != EXPR_CALL)
1083 		return;
1084 
1085 	arg = get_argument_from_call_expr(expr->args, param);
1086 	if (!arg)
1087 		return;
1088 	set_to_user_data(arg, key, value);
1089 }
1090 
1091 static int has_empty_state(struct sm_state *sm)
1092 {
1093 	struct sm_state *tmp;
1094 
1095 	FOR_EACH_PTR(sm->possible, tmp) {
1096 		if (!estate_rl(tmp->state))
1097 			return 1;
1098 	} END_FOR_EACH_PTR(tmp);
1099 
1100 	return 0;
1101 }
1102 
1103 static void param_set_to_user_data(int return_id, char *return_ranges, struct expression *expr)
1104 {
1105 	struct sm_state *sm;
1106 	struct smatch_state *start_state;
1107 	struct range_list *rl;
1108 	int param;
1109 	char *return_str;
1110 	const char *param_name;
1111 	struct symbol *ret_sym;
1112 	bool return_found = false;
1113 
1114 	expr = strip_expr(expr);
1115 	return_str = expr_to_str(expr);
1116 	ret_sym = expr_to_sym(expr);
1117 
1118 	FOR_EACH_MY_SM(my_id, __get_cur_stree(), sm) {
1119 		if (has_empty_state(sm))
1120 			continue;
1121 
1122 		param = get_param_num_from_sym(sm->sym);
1123 		if (param < 0)
1124 			continue;
1125 
1126 		/* The logic here was that if we were passed in a user data then
1127 		 * we don't record that.  It's like the difference between
1128 		 * param_filter and param_set.  When I think about it, I'm not
1129 		 * sure it actually works.  It's probably harmless because we
1130 		 * checked earlier that we're not returning a parameter...
1131 		 * Let's mark this as a TODO.
1132 		 */
1133 		start_state = get_state_stree(start_states, my_id, sm->name, sm->sym);
1134 		if (start_state && rl_equiv(estate_rl(sm->state), estate_rl(start_state)))
1135 			continue;
1136 
1137 		param_name = get_param_name(sm);
1138 		if (!param_name)
1139 			continue;
1140 		if (strcmp(param_name, "$") == 0)  /* The -1 param is handled after the loop */
1141 			continue;
1142 
1143 		sql_insert_return_states(return_id, return_ranges,
1144 					 func_gets_user_data ? USER_DATA3_SET : USER_DATA3,
1145 					 param, param_name, show_rl(estate_rl(sm->state)));
1146 	} END_FOR_EACH_SM(sm);
1147 
1148 	if (points_to_user_data(expr)) {
1149 		sql_insert_return_states(return_id, return_ranges,
1150 					 (is_skb_data(expr) || !func_gets_user_data) ?
1151 					 USER_DATA3_SET : USER_DATA3,
1152 					 -1, "*$", "");
1153 		goto free_string;
1154 	}
1155 
1156 
1157 	FOR_EACH_MY_SM(my_id, __get_cur_stree(), sm) {
1158 		if (!ret_sym)
1159 			break;
1160 		if (ret_sym != sm->sym)
1161 			continue;
1162 
1163 		param_name = state_name_to_param_name(sm->name, return_str);
1164 		if (!param_name)
1165 			continue;
1166 		if (strcmp(param_name, "$") == 0)
1167 			return_found = true;
1168 		sql_insert_return_states(return_id, return_ranges,
1169 					 func_gets_user_data ? USER_DATA3_SET : USER_DATA3,
1170 					 -1, param_name, show_rl(estate_rl(sm->state)));
1171 	} END_FOR_EACH_SM(sm);
1172 
1173 
1174 	if (!return_found && get_user_rl(expr, &rl)) {
1175 		sql_insert_return_states(return_id, return_ranges,
1176 					 func_gets_user_data ? USER_DATA3_SET : USER_DATA3,
1177 					 -1, "$", show_rl(rl));
1178 		goto free_string;
1179 	}
1180 
1181 free_string:
1182 	free_string(return_str);
1183 }
1184 
1185 static struct int_stack *gets_data_stack;
1186 static void match_function_def(struct symbol *sym)
1187 {
1188 	func_gets_user_data = false;
1189 }
1190 
1191 static void match_inline_start(struct expression *expr)
1192 {
1193 	push_int(&gets_data_stack, func_gets_user_data);
1194 }
1195 
1196 static void match_inline_end(struct expression *expr)
1197 {
1198 	func_gets_user_data = pop_int(&gets_data_stack);
1199 }
1200 
1201 void register_kernel_user_data2(int id)
1202 {
1203 	int i;
1204 
1205 	my_id = id;
1206 
1207 	if (option_project != PROJ_KERNEL)
1208 		return;
1209 
1210 	add_hook(&match_function_def, FUNC_DEF_HOOK);
1211 	add_hook(&match_inline_start, INLINE_FN_START);
1212 	add_hook(&match_inline_end, INLINE_FN_END);
1213 
1214 	add_hook(&save_start_states, AFTER_DEF_HOOK);
1215 	add_hook(&free_start_states, AFTER_FUNC_HOOK);
1216 	add_hook(&match_save_states, INLINE_FN_START);
1217 	add_hook(&match_restore_states, INLINE_FN_END);
1218 
1219 	add_unmatched_state_hook(my_id, &empty_state);
1220 	add_extra_nomod_hook(&extra_nomod_hook);
1221 	add_pre_merge_hook(my_id, &pre_merge_hook);
1222 	add_merge_hook(my_id, &merge_estates);
1223 
1224 	add_function_hook("copy_from_user", &match_user_copy, INT_PTR(0));
1225 	add_function_hook("__copy_from_user", &match_user_copy, INT_PTR(0));
1226 	add_function_hook("memcpy_fromiovec", &match_user_copy, INT_PTR(0));
1227 	for (i = 0; i < ARRAY_SIZE(kstr_funcs); i++)
1228 		add_function_hook(kstr_funcs[i], &match_user_copy, INT_PTR(2));
1229 	add_function_hook("usb_control_msg", &match_user_copy, INT_PTR(6));
1230 
1231 	for (i = 0; i < ARRAY_SIZE(returns_user_data); i++) {
1232 		add_function_assign_hook(returns_user_data[i], &match_user_assign_function, NULL);
1233 		add_function_hook(returns_user_data[i], &match_returns_user_rl, NULL);
1234 	}
1235 
1236 	add_function_hook("sscanf", &match_sscanf, NULL);
1237 
1238 	add_hook(&match_syscall_definition, AFTER_DEF_HOOK);
1239 
1240 	add_hook(&match_assign, ASSIGNMENT_HOOK);
1241 	add_hook(&match_condition, CONDITION_HOOK);
1242 
1243 	add_hook(&match_call_info, FUNCTION_CALL_HOOK);
1244 	add_member_info_callback(my_id, struct_member_callback);
1245 	select_caller_info_hook(set_param_user_data, USER_DATA3);
1246 	select_return_states_hook(USER_DATA3, &returns_param_user_data);
1247 	select_return_states_hook(USER_DATA3_SET, &returns_param_user_data_set);
1248 	add_split_return_callback(&param_set_to_user_data);
1249 }
1250 
1251 void register_kernel_user_data3(int id)
1252 {
1253 	my_call_id = id;
1254 
1255 	if (option_project != PROJ_KERNEL)
1256 		return;
1257 	select_caller_info_hook(set_called, INTERNAL);
1258 }
1259 
1260