xref: /illumos-gate/usr/src/tools/smatch/src/sparse-llvm.c (revision 856f710c9dc323b39da5935194d7928ffb99b67f)
1 /*
2  * Example usage:
3  *	./sparse-llvm hello.c | llc | as -o hello.o
4  */
5 
6 #include <llvm-c/Core.h>
7 #include <llvm-c/BitWriter.h>
8 #include <llvm-c/Analysis.h>
9 #include <llvm-c/Target.h>
10 
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <unistd.h>
14 #include <string.h>
15 #include <assert.h>
16 
17 #include "symbol.h"
18 #include "expression.h"
19 #include "linearize.h"
20 #include "flow.h"
21 
22 struct function {
23 	LLVMBuilderRef			builder;
24 	LLVMTypeRef			type;
25 	LLVMValueRef			fn;
26 	LLVMModuleRef			module;
27 };
28 
29 static inline bool symbol_is_fp_type(struct symbol *sym)
30 {
31 	if (!sym)
32 		return false;
33 
34 	return sym->ctype.base_type == &fp_type;
35 }
36 
37 static LLVMTypeRef symbol_type(LLVMModuleRef module, struct symbol *sym);
38 
39 static LLVMTypeRef func_return_type(LLVMModuleRef module, struct symbol *sym)
40 {
41 	return symbol_type(module, sym->ctype.base_type);
42 }
43 
44 static LLVMTypeRef sym_func_type(LLVMModuleRef module, struct symbol *sym)
45 {
46 	LLVMTypeRef *arg_type;
47 	LLVMTypeRef func_type;
48 	LLVMTypeRef ret_type;
49 	struct symbol *arg;
50 	int n_arg = 0;
51 
52 	/* to avoid strangeness with varargs [for now], we build
53 	 * the function and type anew, for each call.  This
54 	 * is probably wrong.  We should look up the
55 	 * symbol declaration info.
56 	 */
57 
58 	ret_type = func_return_type(module, sym);
59 
60 	/* count args, build argument type information */
61 	FOR_EACH_PTR(sym->arguments, arg) {
62 		n_arg++;
63 	} END_FOR_EACH_PTR(arg);
64 
65 	arg_type = calloc(n_arg, sizeof(LLVMTypeRef));
66 
67 	int idx = 0;
68 	FOR_EACH_PTR(sym->arguments, arg) {
69 		struct symbol *arg_sym = arg->ctype.base_type;
70 
71 		arg_type[idx++] = symbol_type(module, arg_sym);
72 	} END_FOR_EACH_PTR(arg);
73 	func_type = LLVMFunctionType(ret_type, arg_type, n_arg,
74 				     sym->variadic);
75 
76 	return func_type;
77 }
78 
79 static LLVMTypeRef sym_array_type(LLVMModuleRef module, struct symbol *sym)
80 {
81 	LLVMTypeRef elem_type;
82 	struct symbol *base_type;
83 
84 	base_type = sym->ctype.base_type;
85 	/* empty struct is undefined [6.7.2.1(8)] */
86 	assert(base_type->bit_size > 0);
87 
88 	elem_type = symbol_type(module, base_type);
89 	if (!elem_type)
90 		return NULL;
91 
92 	return LLVMArrayType(elem_type, sym->bit_size / base_type->bit_size);
93 }
94 
95 #define MAX_STRUCT_MEMBERS 64
96 
97 static LLVMTypeRef sym_struct_type(LLVMModuleRef module, struct symbol *sym)
98 {
99 	LLVMTypeRef elem_types[MAX_STRUCT_MEMBERS];
100 	struct symbol *member;
101 	char buffer[256];
102 	LLVMTypeRef ret;
103 	unsigned nr = 0;
104 
105 	snprintf(buffer, sizeof(buffer), "struct.%s", sym->ident ? sym->ident->name : "anno");
106 	ret = LLVMStructCreateNamed(LLVMGetGlobalContext(), buffer);
107 	/* set ->aux to avoid recursion */
108 	sym->aux = ret;
109 
110 	FOR_EACH_PTR(sym->symbol_list, member) {
111 		LLVMTypeRef member_type;
112 
113 		assert(nr < MAX_STRUCT_MEMBERS);
114 
115 		member_type = symbol_type(module, member);
116 
117 		elem_types[nr++] = member_type;
118 	} END_FOR_EACH_PTR(member);
119 
120 	LLVMStructSetBody(ret, elem_types, nr, 0 /* packed? */);
121 	return ret;
122 }
123 
124 static LLVMTypeRef sym_union_type(LLVMModuleRef module, struct symbol *sym)
125 {
126 	LLVMTypeRef elements;
127 	unsigned union_size;
128 
129 	/*
130 	 * There's no union support in the LLVM API so we treat unions as
131 	 * opaque structs. The downside is that we lose type information on the
132 	 * members but as LLVM doesn't care, neither do we.
133 	 */
134 	union_size = sym->bit_size / 8;
135 
136 	elements = LLVMArrayType(LLVMInt8Type(), union_size);
137 
138 	return LLVMStructType(&elements, 1, 0 /* packed? */);
139 }
140 
141 static LLVMTypeRef sym_ptr_type(LLVMModuleRef module, struct symbol *sym)
142 {
143 	LLVMTypeRef type;
144 
145 	/* 'void *' is treated like 'char *' */
146 	if (is_void_type(sym->ctype.base_type))
147 		type = LLVMInt8Type();
148 	else
149 		type = symbol_type(module, sym->ctype.base_type);
150 
151 	return LLVMPointerType(type, 0);
152 }
153 
154 static LLVMTypeRef sym_basetype_type(struct symbol *sym)
155 {
156 	LLVMTypeRef ret = NULL;
157 
158 	if (symbol_is_fp_type(sym)) {
159 		switch (sym->bit_size) {
160 		case 32:
161 			ret = LLVMFloatType();
162 			break;
163 		case 64:
164 			ret = LLVMDoubleType();
165 			break;
166 		case 80:
167 			ret = LLVMX86FP80Type();
168 			break;
169 		default:
170 			die("invalid bit size %d for type %d", sym->bit_size, sym->type);
171 			break;
172 		}
173 	} else {
174 		switch (sym->bit_size) {
175 		case -1:
176 			ret = LLVMVoidType();
177 			break;
178 		case 1:
179 			ret = LLVMInt1Type();
180 			break;
181 		case 8:
182 			ret = LLVMInt8Type();
183 			break;
184 		case 16:
185 			ret = LLVMInt16Type();
186 			break;
187 		case 32:
188 			ret = LLVMInt32Type();
189 			break;
190 		case 64:
191 			ret = LLVMInt64Type();
192 			break;
193 		default:
194 			die("invalid bit size %d for type %d", sym->bit_size, sym->type);
195 			break;
196 		}
197 	}
198 
199 	return ret;
200 }
201 
202 static LLVMTypeRef symbol_type(LLVMModuleRef module, struct symbol *sym)
203 {
204 	LLVMTypeRef ret = NULL;
205 
206 	/* don't cache the result for SYM_NODE */
207 	if (sym->type == SYM_NODE)
208 		return symbol_type(module, sym->ctype.base_type);
209 
210 	if (sym->aux)
211 		return sym->aux;
212 
213 	switch (sym->type) {
214 	case SYM_BITFIELD:
215 	case SYM_ENUM:
216 		ret = symbol_type(module, sym->ctype.base_type);
217 		break;
218 	case SYM_BASETYPE:
219 		ret = sym_basetype_type(sym);
220 		break;
221 	case SYM_PTR:
222 		ret = sym_ptr_type(module, sym);
223 		break;
224 	case SYM_UNION:
225 		ret = sym_union_type(module, sym);
226 		break;
227 	case SYM_STRUCT:
228 		ret = sym_struct_type(module, sym);
229 		break;
230 	case SYM_ARRAY:
231 		ret = sym_array_type(module, sym);
232 		break;
233 	case SYM_FN:
234 		ret = sym_func_type(module, sym);
235 		break;
236 	default:
237 		assert(0);
238 	}
239 
240 	/* cache the result */
241 	sym->aux = ret;
242 	return ret;
243 }
244 
245 static LLVMTypeRef int_type_by_size(int size)
246 {
247 	switch (size) {
248 		case 1:		return LLVMInt1Type();
249 		case 8:		return LLVMInt8Type();
250 		case 16:	return LLVMInt16Type();
251 		case 32:	return LLVMInt32Type();
252 		case 64:	return LLVMInt64Type();
253 
254 		default:
255 			die("invalid bit size %d", size);
256 			break;
257 	}
258 	return NULL;	/* not reached */
259 }
260 
261 static LLVMTypeRef insn_symbol_type(LLVMModuleRef module, struct instruction *insn)
262 {
263 	if (insn->type)
264 		return symbol_type(module, insn->type);
265 
266 	return int_type_by_size(insn->size);
267 }
268 
269 static LLVMLinkage data_linkage(struct symbol *sym)
270 {
271 	if (sym->ctype.modifiers & MOD_STATIC)
272 		return LLVMPrivateLinkage;
273 
274 	return LLVMExternalLinkage;
275 }
276 
277 static LLVMLinkage function_linkage(struct symbol *sym)
278 {
279 	if (sym->ctype.modifiers & MOD_STATIC)
280 		return LLVMInternalLinkage;
281 
282 	return LLVMExternalLinkage;
283 }
284 
285 #define MAX_PSEUDO_NAME 64
286 
287 static void pseudo_name(pseudo_t pseudo, char *buf)
288 {
289 	switch (pseudo->type) {
290 	case PSEUDO_REG:
291 		snprintf(buf, MAX_PSEUDO_NAME, "R%d", pseudo->nr);
292 		break;
293 	case PSEUDO_SYM:
294 		assert(0);
295 		break;
296 	case PSEUDO_VAL:
297 		assert(0);
298 		break;
299 	case PSEUDO_ARG: {
300 		assert(0);
301 		break;
302 	}
303 	case PSEUDO_PHI:
304 		snprintf(buf, MAX_PSEUDO_NAME, "PHI%d", pseudo->nr);
305 		break;
306 	default:
307 		assert(0);
308 	}
309 }
310 
311 static LLVMValueRef pseudo_to_value(struct function *fn, struct instruction *insn, pseudo_t pseudo)
312 {
313 	LLVMValueRef result = NULL;
314 
315 	switch (pseudo->type) {
316 	case PSEUDO_REG:
317 		result = pseudo->priv;
318 		break;
319 	case PSEUDO_SYM: {
320 		struct symbol *sym = pseudo->sym;
321 		struct expression *expr;
322 
323 		assert(sym->bb_target == NULL);
324 
325 		expr = sym->initializer;
326 		if (expr) {
327 			switch (expr->type) {
328 			case EXPR_STRING: {
329 				const char *s = expr->string->data;
330 				LLVMValueRef indices[] = { LLVMConstInt(LLVMInt64Type(), 0, 0), LLVMConstInt(LLVMInt64Type(), 0, 0) };
331 				LLVMValueRef data;
332 
333 				data = LLVMAddGlobal(fn->module, LLVMArrayType(LLVMInt8Type(), strlen(s) + 1), ".str");
334 				LLVMSetLinkage(data, LLVMPrivateLinkage);
335 				LLVMSetGlobalConstant(data, 1);
336 				LLVMSetInitializer(data, LLVMConstString(strdup(s), strlen(s) + 1, true));
337 
338 				result = LLVMConstGEP(data, indices, ARRAY_SIZE(indices));
339 				break;
340 			}
341 			case EXPR_SYMBOL: {
342 				struct symbol *sym = expr->symbol;
343 
344 				result = LLVMGetNamedGlobal(fn->module, show_ident(sym->ident));
345 				assert(result != NULL);
346 				break;
347 			}
348 			default:
349 				assert(0);
350 			}
351 		} else {
352 			const char *name = show_ident(sym->ident);
353 			LLVMTypeRef type = symbol_type(fn->module, sym);
354 
355 			if (LLVMGetTypeKind(type) == LLVMFunctionTypeKind) {
356 				result = LLVMGetNamedFunction(fn->module, name);
357 				if (!result)
358 					result = LLVMAddFunction(fn->module, name, type);
359 			} else {
360 				result = LLVMGetNamedGlobal(fn->module, name);
361 				if (!result)
362 					result = LLVMAddGlobal(fn->module, type, name);
363 			}
364 		}
365 		break;
366 	}
367 	case PSEUDO_VAL:
368 		result = LLVMConstInt(int_type_by_size(pseudo->size), pseudo->value, 1);
369 		break;
370 	case PSEUDO_ARG: {
371 		result = LLVMGetParam(fn->fn, pseudo->nr - 1);
372 		break;
373 	}
374 	case PSEUDO_PHI:
375 		result = pseudo->priv;
376 		break;
377 	case PSEUDO_VOID:
378 		result = NULL;
379 		break;
380 	default:
381 		assert(0);
382 	}
383 
384 	return result;
385 }
386 
387 static LLVMValueRef calc_gep(LLVMBuilderRef builder, LLVMValueRef base, LLVMValueRef off)
388 {
389 	LLVMTypeRef type = LLVMTypeOf(base);
390 	unsigned int as = LLVMGetPointerAddressSpace(type);
391 	LLVMTypeRef bytep = LLVMPointerType(LLVMInt8Type(), as);
392 	LLVMValueRef addr;
393 
394 	/* convert base to char* type */
395 	base = LLVMBuildPointerCast(builder, base, bytep, "");
396 	/* addr = base + off */
397 	addr = LLVMBuildInBoundsGEP(builder, base, &off, 1, "");
398 	/* convert back to the actual pointer type */
399 	addr = LLVMBuildPointerCast(builder, addr, type, "");
400 	return addr;
401 }
402 
403 static LLVMRealPredicate translate_fop(int opcode)
404 {
405 	static const LLVMRealPredicate trans_tbl[] = {
406 		[OP_SET_EQ]	= LLVMRealOEQ,
407 		[OP_SET_NE]	= LLVMRealUNE,
408 		[OP_SET_LE]	= LLVMRealOLE,
409 		[OP_SET_GE]	= LLVMRealOGE,
410 		[OP_SET_LT]	= LLVMRealOLT,
411 		[OP_SET_GT]	= LLVMRealOGT,
412 		/* Are these used with FP? */
413 		[OP_SET_B]	= LLVMRealOLT,
414 		[OP_SET_A]	= LLVMRealOGT,
415 		[OP_SET_BE]	= LLVMRealOLE,
416 		[OP_SET_AE]	= LLVMRealOGE,
417 	};
418 
419 	return trans_tbl[opcode];
420 }
421 
422 static LLVMIntPredicate translate_op(int opcode)
423 {
424 	static const LLVMIntPredicate trans_tbl[] = {
425 		[OP_SET_EQ]	= LLVMIntEQ,
426 		[OP_SET_NE]	= LLVMIntNE,
427 		[OP_SET_LE]	= LLVMIntSLE,
428 		[OP_SET_GE]	= LLVMIntSGE,
429 		[OP_SET_LT]	= LLVMIntSLT,
430 		[OP_SET_GT]	= LLVMIntSGT,
431 		[OP_SET_B]	= LLVMIntULT,
432 		[OP_SET_A]	= LLVMIntUGT,
433 		[OP_SET_BE]	= LLVMIntULE,
434 		[OP_SET_AE]	= LLVMIntUGE,
435 	};
436 
437 	return trans_tbl[opcode];
438 }
439 
440 static void output_op_binary(struct function *fn, struct instruction *insn)
441 {
442 	LLVMValueRef lhs, rhs, target;
443 	char target_name[64];
444 
445 	lhs = pseudo_to_value(fn, insn, insn->src1);
446 
447 	rhs = pseudo_to_value(fn, insn, insn->src2);
448 
449 	pseudo_name(insn->target, target_name);
450 
451 	switch (insn->opcode) {
452 	/* Binary */
453 	case OP_ADD:
454 		if (symbol_is_fp_type(insn->type))
455 			target = LLVMBuildFAdd(fn->builder, lhs, rhs, target_name);
456 		else
457 			target = LLVMBuildAdd(fn->builder, lhs, rhs, target_name);
458 		break;
459 	case OP_SUB:
460 		if (symbol_is_fp_type(insn->type))
461 			target = LLVMBuildFSub(fn->builder, lhs, rhs, target_name);
462 		else
463 			target = LLVMBuildSub(fn->builder, lhs, rhs, target_name);
464 		break;
465 	case OP_MULU:
466 		if (symbol_is_fp_type(insn->type))
467 			target = LLVMBuildFMul(fn->builder, lhs, rhs, target_name);
468 		else
469 			target = LLVMBuildMul(fn->builder, lhs, rhs, target_name);
470 		break;
471 	case OP_MULS:
472 		assert(!symbol_is_fp_type(insn->type));
473 		target = LLVMBuildMul(fn->builder, lhs, rhs, target_name);
474 		break;
475 	case OP_DIVU:
476 		if (symbol_is_fp_type(insn->type))
477 			target = LLVMBuildFDiv(fn->builder, lhs, rhs, target_name);
478 		else
479 			target = LLVMBuildUDiv(fn->builder, lhs, rhs, target_name);
480 		break;
481 	case OP_DIVS:
482 		assert(!symbol_is_fp_type(insn->type));
483 		target = LLVMBuildSDiv(fn->builder, lhs, rhs, target_name);
484 		break;
485 	case OP_MODU:
486 		assert(!symbol_is_fp_type(insn->type));
487 		target = LLVMBuildURem(fn->builder, lhs, rhs, target_name);
488 		break;
489 	case OP_MODS:
490 		assert(!symbol_is_fp_type(insn->type));
491 		target = LLVMBuildSRem(fn->builder, lhs, rhs, target_name);
492 		break;
493 	case OP_SHL:
494 		assert(!symbol_is_fp_type(insn->type));
495 		target = LLVMBuildShl(fn->builder, lhs, rhs, target_name);
496 		break;
497 	case OP_LSR:
498 		assert(!symbol_is_fp_type(insn->type));
499 		target = LLVMBuildLShr(fn->builder, lhs, rhs, target_name);
500 		break;
501 	case OP_ASR:
502 		assert(!symbol_is_fp_type(insn->type));
503 		target = LLVMBuildAShr(fn->builder, lhs, rhs, target_name);
504 		break;
505 
506 	/* Logical */
507 	case OP_AND:
508 		assert(!symbol_is_fp_type(insn->type));
509 		target = LLVMBuildAnd(fn->builder, lhs, rhs, target_name);
510 		break;
511 	case OP_OR:
512 		assert(!symbol_is_fp_type(insn->type));
513 		target = LLVMBuildOr(fn->builder, lhs, rhs, target_name);
514 		break;
515 	case OP_XOR:
516 		assert(!symbol_is_fp_type(insn->type));
517 		target = LLVMBuildXor(fn->builder, lhs, rhs, target_name);
518 		break;
519 	case OP_AND_BOOL: {
520 		LLVMValueRef lhs_nz, rhs_nz;
521 		LLVMTypeRef dst_type;
522 
523 		lhs_nz = LLVMBuildIsNotNull(fn->builder, lhs, "");
524 		rhs_nz = LLVMBuildIsNotNull(fn->builder, rhs, "");
525 		target = LLVMBuildAnd(fn->builder, lhs_nz, rhs_nz, target_name);
526 
527 		dst_type = insn_symbol_type(fn->module, insn);
528 		target = LLVMBuildZExt(fn->builder, target, dst_type, target_name);
529 		break;
530 	}
531 	case OP_OR_BOOL: {
532 		LLVMValueRef lhs_nz, rhs_nz;
533 		LLVMTypeRef dst_type;
534 
535 		lhs_nz = LLVMBuildIsNotNull(fn->builder, lhs, "");
536 		rhs_nz = LLVMBuildIsNotNull(fn->builder, rhs, "");
537 		target = LLVMBuildOr(fn->builder, lhs_nz, rhs_nz, target_name);
538 
539 		dst_type = insn_symbol_type(fn->module, insn);
540 		target = LLVMBuildZExt(fn->builder, target, dst_type, target_name);
541 		break;
542 	}
543 	default:
544 		assert(0);
545 		break;
546 	}
547 
548 	insn->target->priv = target;
549 }
550 
551 static void output_op_compare(struct function *fn, struct instruction *insn)
552 {
553 	LLVMValueRef lhs, rhs, target;
554 	char target_name[64];
555 
556 	lhs = pseudo_to_value(fn, insn, insn->src1);
557 
558 	if (insn->src2->type == PSEUDO_VAL)
559 		rhs = LLVMConstInt(LLVMTypeOf(lhs), insn->src2->value, 1);
560 	else
561 		rhs = pseudo_to_value(fn, insn, insn->src2);
562 
563 	pseudo_name(insn->target, target_name);
564 
565 	LLVMTypeRef dst_type = insn_symbol_type(fn->module, insn);
566 
567 	if (LLVMGetTypeKind(LLVMTypeOf(lhs)) == LLVMIntegerTypeKind) {
568 		LLVMIntPredicate op = translate_op(insn->opcode);
569 
570 		target = LLVMBuildICmp(fn->builder, op, lhs, rhs, target_name);
571 	} else {
572 		LLVMRealPredicate op = translate_fop(insn->opcode);
573 
574 		target = LLVMBuildFCmp(fn->builder, op, lhs, rhs, target_name);
575 	}
576 
577 	target = LLVMBuildZExt(fn->builder, target, dst_type, target_name);
578 
579 	insn->target->priv = target;
580 }
581 
582 static void output_op_ret(struct function *fn, struct instruction *insn)
583 {
584 	pseudo_t pseudo = insn->src;
585 
586 	if (pseudo && pseudo != VOID) {
587 		LLVMValueRef result = pseudo_to_value(fn, insn, pseudo);
588 
589 		LLVMBuildRet(fn->builder, result);
590 	} else
591 		LLVMBuildRetVoid(fn->builder);
592 }
593 
594 static LLVMValueRef calc_memop_addr(struct function *fn, struct instruction *insn)
595 {
596 	LLVMTypeRef int_type, addr_type;
597 	LLVMValueRef src, off, addr;
598 	unsigned int as;
599 
600 	/* int type large enough to hold a pointer */
601 	int_type = LLVMIntType(bits_in_pointer);
602 	off = LLVMConstInt(int_type, insn->offset, 0);
603 
604 	/* convert src to the effective pointer type */
605 	src = pseudo_to_value(fn, insn, insn->src);
606 	as = LLVMGetPointerAddressSpace(LLVMTypeOf(src));
607 	addr_type = LLVMPointerType(insn_symbol_type(fn->module, insn), as);
608 	src = LLVMBuildPointerCast(fn->builder, src, addr_type, "");
609 
610 	/* addr = src + off */
611 	addr = calc_gep(fn->builder, src, off);
612 	return addr;
613 }
614 
615 
616 static void output_op_load(struct function *fn, struct instruction *insn)
617 {
618 	LLVMValueRef addr, target;
619 
620 	addr = calc_memop_addr(fn, insn);
621 
622 	/* perform load */
623 	target = LLVMBuildLoad(fn->builder, addr, "load_target");
624 
625 	insn->target->priv = target;
626 }
627 
628 static void output_op_store(struct function *fn, struct instruction *insn)
629 {
630 	LLVMValueRef addr, target, target_in;
631 
632 	addr = calc_memop_addr(fn, insn);
633 
634 	target_in = pseudo_to_value(fn, insn, insn->target);
635 
636 	/* perform store */
637 	target = LLVMBuildStore(fn->builder, target_in, addr);
638 
639 	insn->target->priv = target;
640 }
641 
642 static LLVMValueRef bool_value(struct function *fn, LLVMValueRef value)
643 {
644 	if (LLVMTypeOf(value) != LLVMInt1Type())
645 		value = LLVMBuildIsNotNull(fn->builder, value, "cond");
646 
647 	return value;
648 }
649 
650 static void output_op_cbr(struct function *fn, struct instruction *br)
651 {
652 	LLVMValueRef cond = bool_value(fn,
653 			pseudo_to_value(fn, br, br->cond));
654 
655 	LLVMBuildCondBr(fn->builder, cond,
656 			br->bb_true->priv,
657 			br->bb_false->priv);
658 }
659 
660 static void output_op_br(struct function *fn, struct instruction *br)
661 {
662 	LLVMBuildBr(fn->builder, br->bb_true->priv);
663 }
664 
665 static void output_op_sel(struct function *fn, struct instruction *insn)
666 {
667 	LLVMValueRef target, src1, src2, src3;
668 
669 	src1 = bool_value(fn, pseudo_to_value(fn, insn, insn->src1));
670 	src2 = pseudo_to_value(fn, insn, insn->src2);
671 	src3 = pseudo_to_value(fn, insn, insn->src3);
672 
673 	target = LLVMBuildSelect(fn->builder, src1, src2, src3, "select");
674 
675 	insn->target->priv = target;
676 }
677 
678 static void output_op_switch(struct function *fn, struct instruction *insn)
679 {
680 	LLVMValueRef sw_val, target;
681 	struct basic_block *def = NULL;
682 	struct multijmp *jmp;
683 	int n_jmp = 0;
684 
685 	FOR_EACH_PTR(insn->multijmp_list, jmp) {
686 		if (jmp->begin == jmp->end) {		/* case N */
687 			n_jmp++;
688 		} else if (jmp->begin < jmp->end) {	/* case M..N */
689 			assert(0);
690 		} else					/* default case */
691 			def = jmp->target;
692 	} END_FOR_EACH_PTR(jmp);
693 
694 	sw_val = pseudo_to_value(fn, insn, insn->target);
695 	target = LLVMBuildSwitch(fn->builder, sw_val,
696 				 def ? def->priv : NULL, n_jmp);
697 
698 	FOR_EACH_PTR(insn->multijmp_list, jmp) {
699 		if (jmp->begin == jmp->end) {		/* case N */
700 			LLVMAddCase(target,
701 				LLVMConstInt(LLVMInt32Type(), jmp->begin, 0),
702 				jmp->target->priv);
703 		} else if (jmp->begin < jmp->end) {	/* case M..N */
704 			assert(0);
705 		}
706 	} END_FOR_EACH_PTR(jmp);
707 
708 	insn->target->priv = target;
709 }
710 
711 static void output_op_call(struct function *fn, struct instruction *insn)
712 {
713 	LLVMValueRef target, func;
714 	int n_arg = 0, i;
715 	struct pseudo *arg;
716 	LLVMValueRef *args;
717 
718 	FOR_EACH_PTR(insn->arguments, arg) {
719 		n_arg++;
720 	} END_FOR_EACH_PTR(arg);
721 
722 	args = calloc(n_arg, sizeof(LLVMValueRef));
723 
724 	i = 0;
725 	FOR_EACH_PTR(insn->arguments, arg) {
726 		args[i++] = pseudo_to_value(fn, insn, arg);
727 	} END_FOR_EACH_PTR(arg);
728 
729 	func = pseudo_to_value(fn, insn, insn->func);
730 	target = LLVMBuildCall(fn->builder, func, args, n_arg, "");
731 
732 	insn->target->priv = target;
733 }
734 
735 static void output_op_phisrc(struct function *fn, struct instruction *insn)
736 {
737 	LLVMValueRef v;
738 	struct instruction *phi;
739 
740 	assert(insn->target->priv == NULL);
741 
742 	/* target = src */
743 	v = pseudo_to_value(fn, insn, insn->phi_src);
744 
745 	FOR_EACH_PTR(insn->phi_users, phi) {
746 		LLVMValueRef load, ptr;
747 
748 		assert(phi->opcode == OP_PHI);
749 		/* phi must be load from alloca */
750 		load = phi->target->priv;
751 		assert(LLVMGetInstructionOpcode(load) == LLVMLoad);
752 		ptr = LLVMGetOperand(load, 0);
753 		/* store v to alloca */
754 		LLVMBuildStore(fn->builder, v, ptr);
755 	} END_FOR_EACH_PTR(phi);
756 }
757 
758 static void output_op_phi(struct function *fn, struct instruction *insn)
759 {
760 	LLVMValueRef load = insn->target->priv;
761 
762 	/* forward load */
763 	assert(LLVMGetInstructionOpcode(load) == LLVMLoad);
764 	/* forward load has no parent block */
765 	assert(!LLVMGetInstructionParent(load));
766 	/* finalize load in current block  */
767 	LLVMInsertIntoBuilder(fn->builder, load);
768 }
769 
770 static void output_op_ptrcast(struct function *fn, struct instruction *insn)
771 {
772 	LLVMValueRef src, target;
773 	char target_name[64];
774 
775 	src = insn->src->priv;
776 	if (!src)
777 		src = pseudo_to_value(fn, insn, insn->src);
778 
779 	pseudo_name(insn->target, target_name);
780 
781 	assert(!symbol_is_fp_type(insn->type));
782 
783 	target = LLVMBuildBitCast(fn->builder, src, insn_symbol_type(fn->module, insn), target_name);
784 
785 	insn->target->priv = target;
786 }
787 
788 static void output_op_cast(struct function *fn, struct instruction *insn, LLVMOpcode op)
789 {
790 	LLVMValueRef src, target;
791 	char target_name[64];
792 
793 	src = insn->src->priv;
794 	if (!src)
795 		src = pseudo_to_value(fn, insn, insn->src);
796 
797 	pseudo_name(insn->target, target_name);
798 
799 	assert(!symbol_is_fp_type(insn->type));
800 
801 	if (insn->size < LLVMGetIntTypeWidth(LLVMTypeOf(src)))
802 		target = LLVMBuildTrunc(fn->builder, src, insn_symbol_type(fn->module, insn), target_name);
803 	else
804 		target = LLVMBuildCast(fn->builder, op, src, insn_symbol_type(fn->module, insn), target_name);
805 
806 	insn->target->priv = target;
807 }
808 
809 static void output_insn(struct function *fn, struct instruction *insn)
810 {
811 	switch (insn->opcode) {
812 	case OP_RET:
813 		output_op_ret(fn, insn);
814 		break;
815 	case OP_BR:
816 		output_op_br(fn, insn);
817 		break;
818 	case OP_CBR:
819 		output_op_cbr(fn, insn);
820 		break;
821 	case OP_SYMADDR:
822 		assert(0);
823 		break;
824 	case OP_SETVAL:
825 		assert(0);
826 		break;
827 	case OP_SWITCH:
828 		output_op_switch(fn, insn);
829 		break;
830 	case OP_COMPUTEDGOTO:
831 		assert(0);
832 		break;
833 	case OP_PHISOURCE:
834 		output_op_phisrc(fn, insn);
835 		break;
836 	case OP_PHI:
837 		output_op_phi(fn, insn);
838 		break;
839 	case OP_LOAD:
840 		output_op_load(fn, insn);
841 		break;
842 	case OP_LNOP:
843 		assert(0);
844 		break;
845 	case OP_STORE:
846 		output_op_store(fn, insn);
847 		break;
848 	case OP_SNOP:
849 		assert(0);
850 		break;
851 	case OP_INLINED_CALL:
852 		assert(0);
853 		break;
854 	case OP_CALL:
855 		output_op_call(fn, insn);
856 		break;
857 	case OP_CAST:
858 		output_op_cast(fn, insn, LLVMZExt);
859 		break;
860 	case OP_SCAST:
861 		output_op_cast(fn, insn, LLVMSExt);
862 		break;
863 	case OP_FPCAST:
864 		assert(0);
865 		break;
866 	case OP_PTRCAST:
867 		output_op_ptrcast(fn, insn);
868 		break;
869 	case OP_BINARY ... OP_BINARY_END:
870 		output_op_binary(fn, insn);
871 		break;
872 	case OP_BINCMP ... OP_BINCMP_END:
873 		output_op_compare(fn, insn);
874 		break;
875 	case OP_SEL:
876 		output_op_sel(fn, insn);
877 		break;
878 	case OP_SLICE:
879 		assert(0);
880 		break;
881 	case OP_NOT: {
882 		LLVMValueRef src, target;
883 		char target_name[64];
884 
885 		src = pseudo_to_value(fn, insn, insn->src);
886 
887 		pseudo_name(insn->target, target_name);
888 
889 		target = LLVMBuildNot(fn->builder, src, target_name);
890 
891 		insn->target->priv = target;
892 		break;
893 	}
894 	case OP_NEG:
895 		assert(0);
896 		break;
897 	case OP_CONTEXT:
898 		assert(0);
899 		break;
900 	case OP_RANGE:
901 		assert(0);
902 		break;
903 	case OP_NOP:
904 		assert(0);
905 		break;
906 	case OP_DEATHNOTE:
907 		break;
908 	case OP_ASM:
909 		assert(0);
910 		break;
911 	case OP_COPY:
912 		assert(0);
913 		break;
914 	default:
915 		break;
916 	}
917 }
918 
919 static void output_bb(struct function *fn, struct basic_block *bb, unsigned long generation)
920 {
921 	struct instruction *insn;
922 
923 	bb->generation = generation;
924 
925 	FOR_EACH_PTR(bb->insns, insn) {
926 		if (!insn->bb)
927 			continue;
928 
929 		output_insn(fn, insn);
930 	}
931 	END_FOR_EACH_PTR(insn);
932 }
933 
934 #define MAX_ARGS	64
935 
936 static void output_fn(LLVMModuleRef module, struct entrypoint *ep)
937 {
938 	unsigned long generation = ++bb_generation;
939 	struct symbol *sym = ep->name;
940 	struct symbol *base_type = sym->ctype.base_type;
941 	struct symbol *ret_type = sym->ctype.base_type->ctype.base_type;
942 	LLVMTypeRef arg_types[MAX_ARGS];
943 	LLVMTypeRef return_type;
944 	struct function function = { .module = module };
945 	struct basic_block *bb;
946 	struct symbol *arg;
947 	const char *name;
948 	int nr_args = 0;
949 
950 	FOR_EACH_PTR(base_type->arguments, arg) {
951 		struct symbol *arg_base_type = arg->ctype.base_type;
952 
953 		arg_types[nr_args++] = symbol_type(module, arg_base_type);
954 	} END_FOR_EACH_PTR(arg);
955 
956 	name = show_ident(sym->ident);
957 
958 	return_type = symbol_type(module, ret_type);
959 
960 	function.type = LLVMFunctionType(return_type, arg_types, nr_args, 0);
961 
962 	function.fn = LLVMAddFunction(module, name, function.type);
963 	LLVMSetFunctionCallConv(function.fn, LLVMCCallConv);
964 
965 	LLVMSetLinkage(function.fn, function_linkage(sym));
966 
967 	function.builder = LLVMCreateBuilder();
968 
969 	static int nr_bb;
970 
971 	FOR_EACH_PTR(ep->bbs, bb) {
972 		if (bb->generation == generation)
973 			continue;
974 
975 		LLVMBasicBlockRef bbr;
976 		char bbname[32];
977 		struct instruction *insn;
978 
979 		sprintf(bbname, "L%d", nr_bb++);
980 		bbr = LLVMAppendBasicBlock(function.fn, bbname);
981 
982 		bb->priv = bbr;
983 
984 		/* allocate alloca for each phi */
985 		FOR_EACH_PTR(bb->insns, insn) {
986 			LLVMBasicBlockRef entrybbr;
987 			LLVMTypeRef phi_type;
988 			LLVMValueRef ptr;
989 
990 			if (!insn->bb || insn->opcode != OP_PHI)
991 				continue;
992 			/* insert alloca into entry block */
993 			entrybbr = LLVMGetEntryBasicBlock(function.fn);
994 			LLVMPositionBuilderAtEnd(function.builder, entrybbr);
995 			phi_type = insn_symbol_type(module, insn);
996 			ptr = LLVMBuildAlloca(function.builder, phi_type, "");
997 			/* emit forward load for phi */
998 			LLVMClearInsertionPosition(function.builder);
999 			insn->target->priv = LLVMBuildLoad(function.builder, ptr, "phi");
1000 		} END_FOR_EACH_PTR(insn);
1001 	}
1002 	END_FOR_EACH_PTR(bb);
1003 
1004 	FOR_EACH_PTR(ep->bbs, bb) {
1005 		if (bb->generation == generation)
1006 			continue;
1007 
1008 		LLVMPositionBuilderAtEnd(function.builder, bb->priv);
1009 
1010 		output_bb(&function, bb, generation);
1011 	}
1012 	END_FOR_EACH_PTR(bb);
1013 }
1014 
1015 static LLVMValueRef output_data(LLVMModuleRef module, struct symbol *sym)
1016 {
1017 	struct expression *initializer = sym->initializer;
1018 	LLVMValueRef initial_value;
1019 	LLVMValueRef data;
1020 	const char *name;
1021 
1022 	if (initializer) {
1023 		switch (initializer->type) {
1024 		case EXPR_VALUE:
1025 			initial_value = LLVMConstInt(symbol_type(module, sym), initializer->value, 1);
1026 			break;
1027 		case EXPR_SYMBOL: {
1028 			struct symbol *sym = initializer->symbol;
1029 
1030 			initial_value = LLVMGetNamedGlobal(module, show_ident(sym->ident));
1031 			if (!initial_value)
1032 				initial_value = output_data(module, sym);
1033 			break;
1034 		}
1035 		case EXPR_STRING: {
1036 			const char *s = initializer->string->data;
1037 
1038 			initial_value = LLVMConstString(strdup(s), strlen(s) + 1, true);
1039 			break;
1040 		}
1041 		default:
1042 			assert(0);
1043 		}
1044 	} else {
1045 		LLVMTypeRef type = symbol_type(module, sym);
1046 
1047 		initial_value = LLVMConstNull(type);
1048 	}
1049 
1050 	name = show_ident(sym->ident);
1051 
1052 	data = LLVMAddGlobal(module, LLVMTypeOf(initial_value), name);
1053 
1054 	LLVMSetLinkage(data, data_linkage(sym));
1055 	if (sym->ctype.modifiers & MOD_CONST)
1056 		LLVMSetGlobalConstant(data, 1);
1057 	if (sym->ctype.modifiers & MOD_TLS)
1058 		LLVMSetThreadLocal(data, 1);
1059 	if (sym->ctype.alignment)
1060 		LLVMSetAlignment(data, sym->ctype.alignment);
1061 
1062 	if (!(sym->ctype.modifiers & MOD_EXTERN))
1063 		LLVMSetInitializer(data, initial_value);
1064 
1065 	return data;
1066 }
1067 
1068 static int is_prototype(struct symbol *sym)
1069 {
1070 	if (sym->type == SYM_NODE)
1071 		sym = sym->ctype.base_type;
1072 	return sym && sym->type == SYM_FN && !sym->stmt;
1073 }
1074 
1075 static int compile(LLVMModuleRef module, struct symbol_list *list)
1076 {
1077 	struct symbol *sym;
1078 
1079 	FOR_EACH_PTR(list, sym) {
1080 		struct entrypoint *ep;
1081 		expand_symbol(sym);
1082 
1083 		if (is_prototype(sym))
1084 			continue;
1085 
1086 		ep = linearize_symbol(sym);
1087 		if (ep)
1088 			output_fn(module, ep);
1089 		else
1090 			output_data(module, sym);
1091 	}
1092 	END_FOR_EACH_PTR(sym);
1093 
1094 	return 0;
1095 }
1096 
1097 #ifndef LLVM_DEFAULT_TARGET_TRIPLE
1098 #define LLVM_DEFAULT_TARGET_TRIPLE LLVM_HOSTTRIPLE
1099 #endif
1100 
1101 #define X86_LINUX_LAYOUT \
1102 	"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" \
1103 	"i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-" \
1104 	"a0:0:64-f80:32:32-n8:16:32-S128"
1105 
1106 #define X86_64_LINUX_LAYOUT \
1107 	"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" \
1108 	"i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-" \
1109 	"a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
1110 
1111 static void set_target(LLVMModuleRef module)
1112 {
1113 	char target[] = LLVM_DEFAULT_TARGET_TRIPLE;
1114 	const char *arch, *vendor, *os, *env, *layout = NULL;
1115 	char triple[256];
1116 
1117 	arch = strtok(target, "-");
1118 	vendor = strtok(NULL, "-");
1119 	os = strtok(NULL, "-");
1120 	env = strtok(NULL, "-");
1121 
1122 	if (!os)
1123 		return;
1124 	if (!env)
1125 		env = "unknown";
1126 
1127 	if (!strcmp(arch, "x86_64") && !strcmp(os, "linux")) {
1128 		if (arch_m64) {
1129 			layout = X86_64_LINUX_LAYOUT;
1130 		} else {
1131 			arch = "i386";
1132 			layout = X86_LINUX_LAYOUT;
1133 		}
1134 	}
1135 
1136 	/* unsupported target */
1137 	if (!layout)
1138 		return;
1139 
1140 	snprintf(triple, sizeof(triple), "%s-%s-%s-%s", arch, vendor, os, env);
1141 	LLVMSetTarget(module, triple);
1142 	LLVMSetDataLayout(module, layout);
1143 }
1144 
1145 int main(int argc, char **argv)
1146 {
1147 	struct string_list *filelist = NULL;
1148 	struct symbol_list *symlist;
1149 	LLVMModuleRef module;
1150 	char *file;
1151 
1152 	symlist = sparse_initialize(argc, argv, &filelist);
1153 
1154 	module = LLVMModuleCreateWithName("sparse");
1155 	set_target(module);
1156 
1157 	compile(module, symlist);
1158 
1159 	/* need ->phi_users */
1160 	dbg_dead = 1;
1161 	FOR_EACH_PTR_NOTAG(filelist, file) {
1162 		symlist = sparse(file);
1163 		if (die_if_error)
1164 			return 1;
1165 		compile(module, symlist);
1166 	} END_FOR_EACH_PTR_NOTAG(file);
1167 
1168 	LLVMVerifyModule(module, LLVMPrintMessageAction, NULL);
1169 
1170 	LLVMWriteBitcodeToFD(module, STDOUT_FILENO, 0, 0);
1171 
1172 	LLVMDisposeModule(module);
1173 
1174 	report_stats();
1175 	return 0;
1176 }
1177