xref: /illumos-gate/usr/src/lib/libdtrace/common/dt_cg.c (revision 54d82594cac34899a52710db0b8235a171e83e31)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/isa_defs.h>
32 
33 #include <strings.h>
34 #include <stdlib.h>
35 #include <setjmp.h>
36 #include <assert.h>
37 #include <errno.h>
38 
39 #include <dt_impl.h>
40 #include <dt_grammar.h>
41 #include <dt_parser.h>
42 #include <dt_provider.h>
43 
44 static void dt_cg_node(dt_node_t *, dt_irlist_t *, dt_regset_t *);
45 
46 static dt_irnode_t *
47 dt_cg_node_alloc(uint_t label, dif_instr_t instr)
48 {
49 	dt_irnode_t *dip = malloc(sizeof (dt_irnode_t));
50 
51 	if (dip == NULL)
52 		longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
53 
54 	dip->di_label = label;
55 	dip->di_instr = instr;
56 	dip->di_extern = NULL;
57 	dip->di_next = NULL;
58 
59 	return (dip);
60 }
61 
62 /*
63  * Code generator wrapper function for ctf_member_info.  If we are given a
64  * reference to a forward declaration tag, search the entire type space for
65  * the actual definition and then call ctf_member_info on the result.
66  */
67 static ctf_file_t *
68 dt_cg_membinfo(ctf_file_t *fp, ctf_id_t type, const char *s, ctf_membinfo_t *mp)
69 {
70 	while (ctf_type_kind(fp, type) == CTF_K_FORWARD) {
71 		char n[DT_TYPE_NAMELEN];
72 		dtrace_typeinfo_t dtt;
73 
74 		if (ctf_type_name(fp, type, n, sizeof (n)) == NULL ||
75 		    dt_type_lookup(n, &dtt) == -1 || (
76 		    dtt.dtt_ctfp == fp && dtt.dtt_type == type))
77 			break; /* unable to improve our position */
78 
79 		fp = dtt.dtt_ctfp;
80 		type = ctf_type_resolve(fp, dtt.dtt_type);
81 	}
82 
83 	if (ctf_member_info(fp, type, s, mp) == CTF_ERR)
84 		return (NULL); /* ctf_errno is set for us */
85 
86 	return (fp);
87 }
88 
89 static void
90 dt_cg_xsetx(dt_irlist_t *dlp, dt_ident_t *idp, uint_t lbl, int reg, uint64_t x)
91 {
92 	int flag = idp != NULL ? DT_INT_PRIVATE : DT_INT_SHARED;
93 	int intoff = dt_inttab_insert(yypcb->pcb_inttab, x, flag);
94 	dif_instr_t instr = DIF_INSTR_SETX((uint_t)intoff, reg);
95 
96 	if (intoff == -1)
97 		longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
98 
99 	if (intoff > DIF_INTOFF_MAX)
100 		longjmp(yypcb->pcb_jmpbuf, EDT_INT2BIG);
101 
102 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl, instr));
103 
104 	if (idp != NULL)
105 		dlp->dl_last->di_extern = idp;
106 }
107 
108 static void
109 dt_cg_setx(dt_irlist_t *dlp, int reg, uint64_t x)
110 {
111 	dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, reg, x);
112 }
113 
114 /*
115  * When loading bit-fields, we want to convert a byte count in the range
116  * 1-8 to the closest power of 2 (e.g. 3->4, 5->8, etc).  The clp2() function
117  * is a clever implementation from "Hacker's Delight" by Henry Warren, Jr.
118  */
119 static size_t
120 clp2(size_t x)
121 {
122 	x--;
123 
124 	x |= (x >> 1);
125 	x |= (x >> 2);
126 	x |= (x >> 4);
127 	x |= (x >> 8);
128 	x |= (x >> 16);
129 
130 	return (x + 1);
131 }
132 
133 /*
134  * Lookup the correct load opcode to use for the specified node and CTF type.
135  * We determine the size and convert it to a 3-bit index.  Our lookup table
136  * is constructed to use a 5-bit index, consisting of the 3-bit size 0-7, a
137  * bit for the sign, and a bit for userland address.  For example, a 4-byte
138  * signed load from userland would be at the following table index:
139  * user=1 sign=1 size=4 => binary index 11011 = decimal index 27
140  */
141 static uint_t
142 dt_cg_load(dt_node_t *dnp, ctf_file_t *ctfp, ctf_id_t type)
143 {
144 	static const uint_t ops[] = {
145 		DIF_OP_LDUB,	DIF_OP_LDUH,	0,	DIF_OP_LDUW,
146 		0,		0,		0,	DIF_OP_LDX,
147 		DIF_OP_LDSB,	DIF_OP_LDSH,	0,	DIF_OP_LDSW,
148 		0,		0,		0,	DIF_OP_LDX,
149 		DIF_OP_ULDUB,	DIF_OP_ULDUH,	0,	DIF_OP_ULDUW,
150 		0,		0,		0,	DIF_OP_ULDX,
151 		DIF_OP_ULDSB,	DIF_OP_ULDSH,	0,	DIF_OP_ULDSW,
152 		0,		0,		0,	DIF_OP_ULDX,
153 	};
154 
155 	ctf_encoding_t e;
156 	ssize_t size;
157 
158 	/*
159 	 * If we're loading a bit-field, the size of our load is found by
160 	 * rounding cte_bits up to a byte boundary and then finding the
161 	 * nearest power of two to this value (see clp2(), above).
162 	 */
163 	if ((dnp->dn_flags & DT_NF_BITFIELD) &&
164 	    ctf_type_encoding(ctfp, type, &e) != CTF_ERR)
165 		size = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY);
166 	else
167 		size = ctf_type_size(ctfp, type);
168 
169 	if (size < 1 || size > 8 || (size & (size - 1)) != 0) {
170 		xyerror(D_UNKNOWN, "internal error -- cg cannot load "
171 		    "size %ld when passed by value\n", (long)size);
172 	}
173 
174 	size--; /* convert size to 3-bit index */
175 
176 	if (dnp->dn_flags & DT_NF_SIGNED)
177 		size |= 0x08;
178 	if (dnp->dn_flags & DT_NF_USERLAND)
179 		size |= 0x10;
180 
181 	return (ops[size]);
182 }
183 
184 static void
185 dt_cg_ptrsize(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
186     uint_t op, int dreg)
187 {
188 	ctf_file_t *ctfp = dnp->dn_ctfp;
189 	ctf_arinfo_t r;
190 	dif_instr_t instr;
191 	ctf_id_t type;
192 	uint_t kind;
193 	ssize_t size;
194 	int sreg;
195 
196 	if ((sreg = dt_regset_alloc(drp)) == -1)
197 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
198 
199 	type = ctf_type_resolve(ctfp, dnp->dn_type);
200 	kind = ctf_type_kind(ctfp, type);
201 	assert(kind == CTF_K_POINTER || kind == CTF_K_ARRAY);
202 
203 	if (kind == CTF_K_ARRAY) {
204 		if (ctf_array_info(ctfp, type, &r) != 0) {
205 			yypcb->pcb_hdl->dt_ctferr = ctf_errno(ctfp);
206 			longjmp(yypcb->pcb_jmpbuf, EDT_CTF);
207 		}
208 		type = r.ctr_contents;
209 	} else
210 		type = ctf_type_reference(ctfp, type);
211 
212 	if ((size = ctf_type_size(ctfp, type)) == 1)
213 		return; /* multiply or divide by one can be omitted */
214 
215 	dt_cg_setx(dlp, sreg, size);
216 	instr = DIF_INSTR_FMT(op, dreg, sreg, dreg);
217 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
218 	dt_regset_free(drp, sreg);
219 }
220 
221 /*
222  * If the result of a "." or "->" operation is a bit-field, we use this routine
223  * to generate an epilogue to the load instruction that extracts the value.  In
224  * the diagrams below the "ld??" is the load instruction that is generated to
225  * load the containing word that is generating prior to calling this function.
226  *
227  * Epilogue for unsigned fields:	Epilogue for signed fields:
228  *
229  * ldu?	[r1], r1			lds? [r1], r1
230  * setx	USHIFT, r2			setx 64 - SSHIFT, r2
231  * srl	r1, r2, r1			sll  r1, r2, r1
232  * setx	(1 << bits) - 1, r2		setx 64 - bits, r2
233  * and	r1, r2, r1			sra  r1, r2, r1
234  *
235  * The *SHIFT constants above changes value depending on the endian-ness of our
236  * target architecture.  Refer to the comments below for more details.
237  */
238 static void
239 dt_cg_field_get(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
240     ctf_file_t *fp, const ctf_membinfo_t *mp)
241 {
242 	ctf_encoding_t e;
243 	dif_instr_t instr;
244 	uint64_t shift;
245 	int r1, r2;
246 
247 	if (ctf_type_encoding(fp, mp->ctm_type, &e) != 0 || e.cte_bits > 64) {
248 		xyerror(D_UNKNOWN, "cg: bad field: off %lu type <%ld> "
249 		    "bits %u\n", mp->ctm_offset, mp->ctm_type, e.cte_bits);
250 	}
251 
252 	assert(dnp->dn_op == DT_TOK_PTR || dnp->dn_op == DT_TOK_DOT);
253 	r1 = dnp->dn_left->dn_reg;
254 
255 	if ((r2 = dt_regset_alloc(drp)) == -1)
256 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
257 
258 	/*
259 	 * On little-endian architectures, ctm_offset counts from the right so
260 	 * ctm_offset % NBBY itself is the amount we want to shift right to
261 	 * move the value bits to the little end of the register to mask them.
262 	 * On big-endian architectures, ctm_offset counts from the left so we
263 	 * must subtract (ctm_offset % NBBY + cte_bits) from the size in bits
264 	 * we used for the load.  The size of our load in turn is found by
265 	 * rounding cte_bits up to a byte boundary and then finding the
266 	 * nearest power of two to this value (see clp2(), above).  These
267 	 * properties are used to compute shift as USHIFT or SSHIFT, below.
268 	 */
269 	if (dnp->dn_flags & DT_NF_SIGNED) {
270 #ifdef _BIG_ENDIAN
271 		shift = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY) * NBBY -
272 		    mp->ctm_offset % NBBY;
273 #else
274 		shift = mp->ctm_offset % NBBY + e.cte_bits;
275 #endif
276 		dt_cg_setx(dlp, r2, 64 - shift);
277 		instr = DIF_INSTR_FMT(DIF_OP_SLL, r1, r2, r1);
278 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
279 
280 		dt_cg_setx(dlp, r2, 64 - e.cte_bits);
281 		instr = DIF_INSTR_FMT(DIF_OP_SRA, r1, r2, r1);
282 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
283 	} else {
284 #ifdef _BIG_ENDIAN
285 		shift = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY) * NBBY -
286 		    (mp->ctm_offset % NBBY + e.cte_bits);
287 #else
288 		shift = mp->ctm_offset % NBBY;
289 #endif
290 		dt_cg_setx(dlp, r2, shift);
291 		instr = DIF_INSTR_FMT(DIF_OP_SRL, r1, r2, r1);
292 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
293 
294 		dt_cg_setx(dlp, r2, (1ULL << e.cte_bits) - 1);
295 		instr = DIF_INSTR_FMT(DIF_OP_AND, r1, r2, r1);
296 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
297 	}
298 
299 	dt_regset_free(drp, r2);
300 }
301 
302 /*
303  * If the destination of a store operation is a bit-field, we use this routine
304  * to generate a prologue to the store instruction that loads the surrounding
305  * bits, clears the destination field, and ORs in the new value of the field.
306  * In the diagram below the "st?" is the store instruction that is generated to
307  * store the containing word that is generating after calling this function.
308  *
309  * ld	[dst->dn_reg], r1
310  * setx	~(((1 << cte_bits) - 1) << (ctm_offset % NBBY)), r2
311  * and	r1, r2, r1
312  *
313  * setx	(1 << cte_bits) - 1, r2
314  * and	src->dn_reg, r2, r2
315  * setx ctm_offset % NBBY, r3
316  * sll	r2, r3, r2
317  *
318  * or	r1, r2, r1
319  * st?	r1, [dst->dn_reg]
320  *
321  * This routine allocates a new register to hold the value to be stored and
322  * returns it.  The caller is responsible for freeing this register later.
323  */
324 static int
325 dt_cg_field_set(dt_node_t *src, dt_irlist_t *dlp,
326     dt_regset_t *drp, dt_node_t *dst)
327 {
328 	uint64_t cmask, fmask, shift;
329 	dif_instr_t instr;
330 	int r1, r2, r3;
331 
332 	ctf_membinfo_t m;
333 	ctf_encoding_t e;
334 	ctf_file_t *fp, *ofp;
335 	ctf_id_t type;
336 
337 	assert(dst->dn_op == DT_TOK_PTR || dst->dn_op == DT_TOK_DOT);
338 	assert(dst->dn_right->dn_kind == DT_NODE_IDENT);
339 
340 	fp = dst->dn_left->dn_ctfp;
341 	type = ctf_type_resolve(fp, dst->dn_left->dn_type);
342 
343 	if (dst->dn_op == DT_TOK_PTR) {
344 		type = ctf_type_reference(fp, type);
345 		type = ctf_type_resolve(fp, type);
346 	}
347 
348 	if ((fp = dt_cg_membinfo(ofp = fp, type,
349 	    dst->dn_right->dn_string, &m)) == NULL) {
350 		yypcb->pcb_hdl->dt_ctferr = ctf_errno(ofp);
351 		longjmp(yypcb->pcb_jmpbuf, EDT_CTF);
352 	}
353 
354 	if (ctf_type_encoding(fp, m.ctm_type, &e) != 0 || e.cte_bits > 64) {
355 		xyerror(D_UNKNOWN, "cg: bad field: off %lu type <%ld> "
356 		    "bits %u\n", m.ctm_offset, m.ctm_type, e.cte_bits);
357 	}
358 
359 	if ((r1 = dt_regset_alloc(drp)) == -1 ||
360 	    (r2 = dt_regset_alloc(drp)) == -1 ||
361 	    (r3 = dt_regset_alloc(drp)) == -1)
362 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
363 
364 	/*
365 	 * Compute shifts and masks.  We need to compute "shift" as the amount
366 	 * we need to shift left to position our field in the containing word.
367 	 * Refer to the comments in dt_cg_field_get(), above, for more info.
368 	 * We then compute fmask as the mask that truncates the value in the
369 	 * input register to width cte_bits, and cmask as the mask used to
370 	 * pass through the containing bits and zero the field bits.
371 	 */
372 #ifdef _BIG_ENDIAN
373 	shift = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY) * NBBY -
374 	    (m.ctm_offset % NBBY + e.cte_bits);
375 #else
376 	shift = m.ctm_offset % NBBY;
377 #endif
378 	fmask = (1ULL << e.cte_bits) - 1;
379 	cmask = ~(fmask << shift);
380 
381 	instr = DIF_INSTR_LOAD(
382 	    dt_cg_load(dst, fp, m.ctm_type), dst->dn_reg, r1);
383 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
384 
385 	dt_cg_setx(dlp, r2, cmask);
386 	instr = DIF_INSTR_FMT(DIF_OP_AND, r1, r2, r1);
387 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
388 
389 	dt_cg_setx(dlp, r2, fmask);
390 	instr = DIF_INSTR_FMT(DIF_OP_AND, src->dn_reg, r2, r2);
391 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
392 
393 	dt_cg_setx(dlp, r3, shift);
394 	instr = DIF_INSTR_FMT(DIF_OP_SLL, r2, r3, r2);
395 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
396 
397 	instr = DIF_INSTR_FMT(DIF_OP_OR, r1, r2, r1);
398 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
399 
400 	dt_regset_free(drp, r3);
401 	dt_regset_free(drp, r2);
402 
403 	return (r1);
404 }
405 
406 static void
407 dt_cg_store(dt_node_t *src, dt_irlist_t *dlp, dt_regset_t *drp, dt_node_t *dst)
408 {
409 	ctf_encoding_t e;
410 	dif_instr_t instr;
411 	size_t size;
412 	int reg;
413 
414 	/*
415 	 * If we're loading a bit-field, the size of our store is found by
416 	 * rounding dst's cte_bits up to a byte boundary and then finding the
417 	 * nearest power of two to this value (see clp2(), above).
418 	 */
419 	if ((dst->dn_flags & DT_NF_BITFIELD) &&
420 	    ctf_type_encoding(dst->dn_ctfp, dst->dn_type, &e) != CTF_ERR)
421 		size = clp2(P2ROUNDUP(e.cte_bits, NBBY) / NBBY);
422 	else
423 		size = dt_node_type_size(src);
424 
425 	if (src->dn_flags & DT_NF_REF) {
426 		if ((reg = dt_regset_alloc(drp)) == -1)
427 			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
428 		dt_cg_setx(dlp, reg, size);
429 		instr = DIF_INSTR_COPYS(src->dn_reg, reg, dst->dn_reg);
430 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
431 		dt_regset_free(drp, reg);
432 	} else {
433 		if (dst->dn_flags & DT_NF_BITFIELD)
434 			reg = dt_cg_field_set(src, dlp, drp, dst);
435 		else
436 			reg = src->dn_reg;
437 
438 		switch (size) {
439 		case 1:
440 			instr = DIF_INSTR_STORE(DIF_OP_STB, reg, dst->dn_reg);
441 			break;
442 		case 2:
443 			instr = DIF_INSTR_STORE(DIF_OP_STH, reg, dst->dn_reg);
444 			break;
445 		case 4:
446 			instr = DIF_INSTR_STORE(DIF_OP_STW, reg, dst->dn_reg);
447 			break;
448 		case 8:
449 			instr = DIF_INSTR_STORE(DIF_OP_STX, reg, dst->dn_reg);
450 			break;
451 		default:
452 			xyerror(D_UNKNOWN, "internal error -- cg cannot store "
453 			    "size %lu when passed by value\n", (ulong_t)size);
454 		}
455 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
456 
457 		if (dst->dn_flags & DT_NF_BITFIELD)
458 			dt_regset_free(drp, reg);
459 	}
460 }
461 
462 /*
463  * Generate code for a typecast or for argument promotion from the type of the
464  * actual to the type of the formal.  We need to generate code for casts when
465  * a scalar type is being narrowed or changing signed-ness.  We first shift the
466  * desired bits high (losing excess bits if narrowing) and then shift them down
467  * using logical shift (unsigned result) or arithmetic shift (signed result).
468  */
469 static void
470 dt_cg_typecast(const dt_node_t *src, const dt_node_t *dst,
471     dt_irlist_t *dlp, dt_regset_t *drp)
472 {
473 	size_t srcsize = dt_node_type_size(src);
474 	size_t dstsize = dt_node_type_size(dst);
475 
476 	dif_instr_t instr;
477 	int reg, n;
478 
479 	if (dt_node_is_scalar(dst) && (dstsize < srcsize ||
480 	    (src->dn_flags & DT_NF_SIGNED) ^ (dst->dn_flags & DT_NF_SIGNED))) {
481 		if ((reg = dt_regset_alloc(drp)) == -1)
482 			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
483 
484 		if (dstsize < srcsize)
485 			n = sizeof (uint64_t) * NBBY - dstsize * NBBY;
486 		else
487 			n = sizeof (uint64_t) * NBBY - srcsize * NBBY;
488 
489 		dt_cg_setx(dlp, reg, n);
490 
491 		instr = DIF_INSTR_FMT(DIF_OP_SLL,
492 		    src->dn_reg, reg, dst->dn_reg);
493 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
494 
495 		instr = DIF_INSTR_FMT((dst->dn_flags & DT_NF_SIGNED) ?
496 		    DIF_OP_SRA : DIF_OP_SRL, dst->dn_reg, reg, dst->dn_reg);
497 
498 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
499 		dt_regset_free(drp, reg);
500 	}
501 }
502 
503 /*
504  * Generate code to push the specified argument list on to the tuple stack.
505  * We use this routine for handling subroutine calls and associative arrays.
506  * We must first generate code for all subexpressions before loading the stack
507  * because any subexpression could itself require the use of the tuple stack.
508  * This holds a number of registers equal to the number of arguments, but this
509  * is not a huge problem because the number of arguments can't exceed the
510  * number of tuple register stack elements anyway.  At most one extra register
511  * is required (either by dt_cg_typecast() or for dtdt_size, below).  This
512  * implies that a DIF implementation should offer a number of general purpose
513  * registers at least one greater than the number of tuple registers.
514  */
515 static void
516 dt_cg_arglist(dt_ident_t *idp, dt_node_t *args,
517     dt_irlist_t *dlp, dt_regset_t *drp)
518 {
519 	const dt_idsig_t *isp = idp->di_data;
520 	dt_node_t *dnp;
521 	int i = 0;
522 
523 	for (dnp = args; dnp != NULL; dnp = dnp->dn_list)
524 		dt_cg_node(dnp, dlp, drp);
525 
526 	dt_irlist_append(dlp,
527 	    dt_cg_node_alloc(DT_LBL_NONE, DIF_INSTR_FLUSHTS));
528 
529 	for (dnp = args; dnp != NULL; dnp = dnp->dn_list, i++) {
530 		dtrace_diftype_t t;
531 		dif_instr_t instr;
532 		uint_t op;
533 		int reg;
534 
535 		dt_node_diftype(yypcb->pcb_hdl, dnp, &t);
536 
537 		isp->dis_args[i].dn_reg = dnp->dn_reg; /* re-use register */
538 		dt_cg_typecast(dnp, &isp->dis_args[i], dlp, drp);
539 		isp->dis_args[i].dn_reg = -1;
540 
541 		if (t.dtdt_flags & DIF_TF_BYREF)
542 			op = DIF_OP_PUSHTR;
543 		else
544 			op = DIF_OP_PUSHTV;
545 
546 		if (t.dtdt_size != 0) {
547 			if ((reg = dt_regset_alloc(drp)) == -1)
548 				longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
549 			dt_cg_setx(dlp, reg, t.dtdt_size);
550 		} else
551 			reg = DIF_REG_R0;
552 
553 		instr = DIF_INSTR_PUSHTS(op, t.dtdt_kind, reg, dnp->dn_reg);
554 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
555 		dt_regset_free(drp, dnp->dn_reg);
556 
557 		if (reg != DIF_REG_R0)
558 			dt_regset_free(drp, reg);
559 	}
560 
561 	if (i > yypcb->pcb_hdl->dt_conf.dtc_diftupregs)
562 		longjmp(yypcb->pcb_jmpbuf, EDT_NOTUPREG);
563 }
564 
565 static void
566 dt_cg_arithmetic_op(dt_node_t *dnp, dt_irlist_t *dlp,
567     dt_regset_t *drp, uint_t op)
568 {
569 	int is_ptr_op = (dnp->dn_op == DT_TOK_ADD || dnp->dn_op == DT_TOK_SUB ||
570 	    dnp->dn_op == DT_TOK_ADD_EQ || dnp->dn_op == DT_TOK_SUB_EQ);
571 
572 	int lp_is_ptr = dt_node_is_pointer(dnp->dn_left);
573 	int rp_is_ptr = dt_node_is_pointer(dnp->dn_right);
574 
575 	dif_instr_t instr;
576 
577 	if (lp_is_ptr && rp_is_ptr) {
578 		assert(dnp->dn_op == DT_TOK_SUB);
579 		is_ptr_op = 0;
580 	}
581 
582 	dt_cg_node(dnp->dn_left, dlp, drp);
583 	if (is_ptr_op && rp_is_ptr)
584 		dt_cg_ptrsize(dnp, dlp, drp, DIF_OP_MUL, dnp->dn_left->dn_reg);
585 
586 	dt_cg_node(dnp->dn_right, dlp, drp);
587 	if (is_ptr_op && lp_is_ptr)
588 		dt_cg_ptrsize(dnp, dlp, drp, DIF_OP_MUL, dnp->dn_right->dn_reg);
589 
590 	instr = DIF_INSTR_FMT(op, dnp->dn_left->dn_reg,
591 	    dnp->dn_right->dn_reg, dnp->dn_left->dn_reg);
592 
593 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
594 	dt_regset_free(drp, dnp->dn_right->dn_reg);
595 	dnp->dn_reg = dnp->dn_left->dn_reg;
596 
597 	if (lp_is_ptr && rp_is_ptr)
598 		dt_cg_ptrsize(dnp->dn_right,
599 		    dlp, drp, DIF_OP_UDIV, dnp->dn_reg);
600 }
601 
602 static uint_t
603 dt_cg_stvar(const dt_ident_t *idp)
604 {
605 	static const uint_t aops[] = { DIF_OP_STGAA, DIF_OP_STTAA, DIF_OP_NOP };
606 	static const uint_t sops[] = { DIF_OP_STGS, DIF_OP_STTS, DIF_OP_STLS };
607 
608 	uint_t i = (((idp->di_flags & DT_IDFLG_LOCAL) != 0) << 1) |
609 	    ((idp->di_flags & DT_IDFLG_TLS) != 0);
610 
611 	return (idp->di_kind == DT_IDENT_ARRAY ? aops[i] : sops[i]);
612 }
613 
614 static void
615 dt_cg_prearith_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp, uint_t op)
616 {
617 	ctf_file_t *ctfp = dnp->dn_ctfp;
618 	dif_instr_t instr;
619 	ctf_id_t type;
620 	ssize_t size = 1;
621 	int reg;
622 
623 	if (dt_node_is_pointer(dnp)) {
624 		type = ctf_type_resolve(ctfp, dnp->dn_type);
625 		assert(ctf_type_kind(ctfp, type) == CTF_K_POINTER);
626 		size = ctf_type_size(ctfp, ctf_type_reference(ctfp, type));
627 	}
628 
629 	dt_cg_node(dnp->dn_child, dlp, drp);
630 	dnp->dn_reg = dnp->dn_child->dn_reg;
631 
632 	if ((reg = dt_regset_alloc(drp)) == -1)
633 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
634 
635 	dt_cg_setx(dlp, reg, size);
636 
637 	instr = DIF_INSTR_FMT(op, dnp->dn_reg, reg, dnp->dn_reg);
638 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
639 	dt_regset_free(drp, reg);
640 
641 	/*
642 	 * If we are modifying a variable, generate an stv instruction from
643 	 * the variable specified by the identifier.  If we are storing to a
644 	 * memory address, generate code again for the left-hand side using
645 	 * DT_NF_REF to get the address, and then generate a store to it.
646 	 * In both paths, we store the value in dnp->dn_reg (the new value).
647 	 */
648 	if (dnp->dn_child->dn_kind == DT_NODE_VAR) {
649 		dt_ident_t *idp = dt_ident_resolve(dnp->dn_child->dn_ident);
650 
651 		idp->di_flags |= DT_IDFLG_DIFW;
652 		instr = DIF_INSTR_STV(dt_cg_stvar(idp),
653 		    idp->di_id, dnp->dn_reg);
654 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
655 	} else {
656 		uint_t rbit = dnp->dn_child->dn_flags & DT_NF_REF;
657 
658 		assert(dnp->dn_child->dn_flags & DT_NF_WRITABLE);
659 		assert(dnp->dn_child->dn_flags & DT_NF_LVALUE);
660 
661 		dnp->dn_child->dn_flags |= DT_NF_REF; /* force pass-by-ref */
662 		dt_cg_node(dnp->dn_child, dlp, drp);
663 
664 		dt_cg_store(dnp, dlp, drp, dnp->dn_child);
665 		dt_regset_free(drp, dnp->dn_child->dn_reg);
666 
667 		dnp->dn_left->dn_flags &= ~DT_NF_REF;
668 		dnp->dn_left->dn_flags |= rbit;
669 	}
670 }
671 
672 static void
673 dt_cg_postarith_op(dt_node_t *dnp, dt_irlist_t *dlp,
674     dt_regset_t *drp, uint_t op)
675 {
676 	ctf_file_t *ctfp = dnp->dn_ctfp;
677 	dif_instr_t instr;
678 	ctf_id_t type;
679 	ssize_t size = 1;
680 	int nreg;
681 
682 	if (dt_node_is_pointer(dnp)) {
683 		type = ctf_type_resolve(ctfp, dnp->dn_type);
684 		assert(ctf_type_kind(ctfp, type) == CTF_K_POINTER);
685 		size = ctf_type_size(ctfp, ctf_type_reference(ctfp, type));
686 	}
687 
688 	dt_cg_node(dnp->dn_child, dlp, drp);
689 	dnp->dn_reg = dnp->dn_child->dn_reg;
690 
691 	if ((nreg = dt_regset_alloc(drp)) == -1)
692 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
693 
694 	dt_cg_setx(dlp, nreg, size);
695 	instr = DIF_INSTR_FMT(op, dnp->dn_reg, nreg, nreg);
696 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
697 
698 	/*
699 	 * If we are modifying a variable, generate an stv instruction from
700 	 * the variable specified by the identifier.  If we are storing to a
701 	 * memory address, generate code again for the left-hand side using
702 	 * DT_NF_REF to get the address, and then generate a store to it.
703 	 * In both paths, we store the value from 'nreg' (the new value).
704 	 */
705 	if (dnp->dn_child->dn_kind == DT_NODE_VAR) {
706 		dt_ident_t *idp = dt_ident_resolve(dnp->dn_child->dn_ident);
707 
708 		idp->di_flags |= DT_IDFLG_DIFW;
709 		instr = DIF_INSTR_STV(dt_cg_stvar(idp), idp->di_id, nreg);
710 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
711 	} else {
712 		uint_t rbit = dnp->dn_child->dn_flags & DT_NF_REF;
713 		int oreg = dnp->dn_reg;
714 
715 		assert(dnp->dn_child->dn_flags & DT_NF_WRITABLE);
716 		assert(dnp->dn_child->dn_flags & DT_NF_LVALUE);
717 
718 		dnp->dn_child->dn_flags |= DT_NF_REF; /* force pass-by-ref */
719 		dt_cg_node(dnp->dn_child, dlp, drp);
720 
721 		dnp->dn_reg = nreg;
722 		dt_cg_store(dnp, dlp, drp, dnp->dn_child);
723 		dnp->dn_reg = oreg;
724 
725 		dt_regset_free(drp, dnp->dn_child->dn_reg);
726 		dnp->dn_left->dn_flags &= ~DT_NF_REF;
727 		dnp->dn_left->dn_flags |= rbit;
728 	}
729 
730 	dt_regset_free(drp, nreg);
731 }
732 
733 /*
734  * Determine if we should perform signed or unsigned comparison for an OP2.
735  * If both operands are of arithmetic type, perform the usual arithmetic
736  * conversions to determine the common real type for comparison [ISOC 6.5.8.3].
737  */
738 static int
739 dt_cg_compare_signed(dt_node_t *dnp)
740 {
741 	dt_node_t dn;
742 
743 	if (dt_node_is_string(dnp->dn_left) ||
744 	    dt_node_is_string(dnp->dn_right))
745 		return (1); /* strings always compare signed */
746 	else if (!dt_node_is_arith(dnp->dn_left) ||
747 	    !dt_node_is_arith(dnp->dn_right))
748 		return (0); /* non-arithmetic types always compare unsigned */
749 
750 	bzero(&dn, sizeof (dn));
751 	dt_node_promote(dnp->dn_left, dnp->dn_right, &dn);
752 	return (dn.dn_flags & DT_NF_SIGNED);
753 }
754 
755 static void
756 dt_cg_compare_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp, uint_t op)
757 {
758 	uint_t lbl_true = dt_irlist_label(dlp);
759 	uint_t lbl_post = dt_irlist_label(dlp);
760 
761 	dif_instr_t instr;
762 	uint_t opc;
763 
764 	dt_cg_node(dnp->dn_left, dlp, drp);
765 	dt_cg_node(dnp->dn_right, dlp, drp);
766 
767 	if (dt_node_is_string(dnp->dn_left) || dt_node_is_string(dnp->dn_right))
768 		opc = DIF_OP_SCMP;
769 	else
770 		opc = DIF_OP_CMP;
771 
772 	instr = DIF_INSTR_CMP(opc, dnp->dn_left->dn_reg, dnp->dn_right->dn_reg);
773 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
774 	dt_regset_free(drp, dnp->dn_right->dn_reg);
775 	dnp->dn_reg = dnp->dn_left->dn_reg;
776 
777 	instr = DIF_INSTR_BRANCH(op, lbl_true);
778 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
779 
780 	instr = DIF_INSTR_MOV(DIF_REG_R0, dnp->dn_reg);
781 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
782 
783 	instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post);
784 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
785 
786 	dt_cg_xsetx(dlp, NULL, lbl_true, dnp->dn_reg, 1);
787 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP));
788 }
789 
790 /*
791  * Code generation for the ternary op requires some trickery with the assembler
792  * in order to conserve registers.  We generate code for dn_expr and dn_left
793  * and free their registers so they do not have be consumed across codegen for
794  * dn_right.  We insert a dummy MOV at the end of dn_left into the destination
795  * register, which is not yet known because we haven't done dn_right yet, and
796  * save the pointer to this instruction node.  We then generate code for
797  * dn_right and use its register as our output.  Finally, we reach back and
798  * patch the instruction for dn_left to move its output into this register.
799  */
800 static void
801 dt_cg_ternary_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
802 {
803 	uint_t lbl_false = dt_irlist_label(dlp);
804 	uint_t lbl_post = dt_irlist_label(dlp);
805 
806 	dif_instr_t instr;
807 	dt_irnode_t *dip;
808 
809 	dt_cg_node(dnp->dn_expr, dlp, drp);
810 	instr = DIF_INSTR_TST(dnp->dn_expr->dn_reg);
811 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
812 	dt_regset_free(drp, dnp->dn_expr->dn_reg);
813 
814 	instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_false);
815 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
816 
817 	dt_cg_node(dnp->dn_left, dlp, drp);
818 	instr = DIF_INSTR_MOV(dnp->dn_left->dn_reg, DIF_REG_R0);
819 	dip = dt_cg_node_alloc(DT_LBL_NONE, instr); /* save dip for below */
820 	dt_irlist_append(dlp, dip);
821 	dt_regset_free(drp, dnp->dn_left->dn_reg);
822 
823 	instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post);
824 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
825 
826 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_false, DIF_INSTR_NOP));
827 	dt_cg_node(dnp->dn_right, dlp, drp);
828 	dnp->dn_reg = dnp->dn_right->dn_reg;
829 
830 	/*
831 	 * Now that dn_reg is assigned, reach back and patch the correct MOV
832 	 * instruction into the tail of dn_left.  We know dn_reg was unused
833 	 * at that point because otherwise dn_right couldn't have allocated it.
834 	 */
835 	dip->di_instr = DIF_INSTR_MOV(dnp->dn_left->dn_reg, dnp->dn_reg);
836 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP));
837 }
838 
839 static void
840 dt_cg_logical_and(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
841 {
842 	uint_t lbl_false = dt_irlist_label(dlp);
843 	uint_t lbl_post = dt_irlist_label(dlp);
844 
845 	dif_instr_t instr;
846 
847 	dt_cg_node(dnp->dn_left, dlp, drp);
848 	instr = DIF_INSTR_TST(dnp->dn_left->dn_reg);
849 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
850 	dt_regset_free(drp, dnp->dn_left->dn_reg);
851 
852 	instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_false);
853 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
854 
855 	dt_cg_node(dnp->dn_right, dlp, drp);
856 	instr = DIF_INSTR_TST(dnp->dn_right->dn_reg);
857 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
858 	dnp->dn_reg = dnp->dn_right->dn_reg;
859 
860 	instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_false);
861 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
862 
863 	dt_cg_setx(dlp, dnp->dn_reg, 1);
864 
865 	instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post);
866 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
867 
868 	instr = DIF_INSTR_MOV(DIF_REG_R0, dnp->dn_reg);
869 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_false, instr));
870 
871 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP));
872 }
873 
874 static void
875 dt_cg_logical_xor(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
876 {
877 	uint_t lbl_next = dt_irlist_label(dlp);
878 	uint_t lbl_tail = dt_irlist_label(dlp);
879 
880 	dif_instr_t instr;
881 
882 	dt_cg_node(dnp->dn_left, dlp, drp);
883 	instr = DIF_INSTR_TST(dnp->dn_left->dn_reg);
884 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
885 
886 	instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_next);
887 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
888 	dt_cg_setx(dlp, dnp->dn_left->dn_reg, 1);
889 
890 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_next, DIF_INSTR_NOP));
891 	dt_cg_node(dnp->dn_right, dlp, drp);
892 
893 	instr = DIF_INSTR_TST(dnp->dn_right->dn_reg);
894 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
895 
896 	instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_tail);
897 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
898 	dt_cg_setx(dlp, dnp->dn_right->dn_reg, 1);
899 
900 	instr = DIF_INSTR_FMT(DIF_OP_XOR, dnp->dn_left->dn_reg,
901 	    dnp->dn_right->dn_reg, dnp->dn_left->dn_reg);
902 
903 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_tail, instr));
904 
905 	dt_regset_free(drp, dnp->dn_right->dn_reg);
906 	dnp->dn_reg = dnp->dn_left->dn_reg;
907 }
908 
909 static void
910 dt_cg_logical_or(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
911 {
912 	uint_t lbl_true = dt_irlist_label(dlp);
913 	uint_t lbl_false = dt_irlist_label(dlp);
914 	uint_t lbl_post = dt_irlist_label(dlp);
915 
916 	dif_instr_t instr;
917 
918 	dt_cg_node(dnp->dn_left, dlp, drp);
919 	instr = DIF_INSTR_TST(dnp->dn_left->dn_reg);
920 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
921 	dt_regset_free(drp, dnp->dn_left->dn_reg);
922 
923 	instr = DIF_INSTR_BRANCH(DIF_OP_BNE, lbl_true);
924 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
925 
926 	dt_cg_node(dnp->dn_right, dlp, drp);
927 	instr = DIF_INSTR_TST(dnp->dn_right->dn_reg);
928 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
929 	dnp->dn_reg = dnp->dn_right->dn_reg;
930 
931 	instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_false);
932 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
933 
934 	dt_cg_xsetx(dlp, NULL, lbl_true, dnp->dn_reg, 1);
935 
936 	instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post);
937 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
938 
939 	instr = DIF_INSTR_MOV(DIF_REG_R0, dnp->dn_reg);
940 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_false, instr));
941 
942 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP));
943 }
944 
945 static void
946 dt_cg_logical_neg(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
947 {
948 	uint_t lbl_zero = dt_irlist_label(dlp);
949 	uint_t lbl_post = dt_irlist_label(dlp);
950 
951 	dif_instr_t instr;
952 
953 	dt_cg_node(dnp->dn_child, dlp, drp);
954 	dnp->dn_reg = dnp->dn_child->dn_reg;
955 
956 	instr = DIF_INSTR_TST(dnp->dn_reg);
957 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
958 
959 	instr = DIF_INSTR_BRANCH(DIF_OP_BE, lbl_zero);
960 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
961 
962 	instr = DIF_INSTR_MOV(DIF_REG_R0, dnp->dn_reg);
963 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
964 
965 	instr = DIF_INSTR_BRANCH(DIF_OP_BA, lbl_post);
966 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
967 
968 	dt_cg_xsetx(dlp, NULL, lbl_zero, dnp->dn_reg, 1);
969 	dt_irlist_append(dlp, dt_cg_node_alloc(lbl_post, DIF_INSTR_NOP));
970 }
971 
972 static void
973 dt_cg_asgn_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
974 {
975 	dif_instr_t instr;
976 	dt_ident_t *idp;
977 
978 	/*
979 	 * If we are performing a structure assignment of a translated type,
980 	 * we must instantiate all members and create a snapshot of the object
981 	 * in scratch space.  We allocs a chunk of memory, generate code for
982 	 * each member, and then set dnp->dn_reg to the scratch object address.
983 	 */
984 	if ((idp = dt_node_resolve(dnp->dn_right, DT_IDENT_XLSOU)) != NULL) {
985 		ctf_membinfo_t ctm;
986 		dt_xlator_t *dxp = idp->di_data;
987 		dt_node_t *mnp, dn, mn;
988 		int r1, r2;
989 
990 		/*
991 		 * Create two fake dt_node_t's representing operator "." and a
992 		 * right-hand identifier child node.  These will be repeatedly
993 		 * modified according to each instantiated member so that we
994 		 * can pass them to dt_cg_store() and effect a member store.
995 		 */
996 		bzero(&dn, sizeof (dt_node_t));
997 		dn.dn_kind = DT_NODE_OP2;
998 		dn.dn_op = DT_TOK_DOT;
999 		dn.dn_left = dnp;
1000 		dn.dn_right = &mn;
1001 
1002 		bzero(&mn, sizeof (dt_node_t));
1003 		mn.dn_kind = DT_NODE_IDENT;
1004 		mn.dn_op = DT_TOK_IDENT;
1005 
1006 		/*
1007 		 * Allocate a register for our scratch data pointer.  First we
1008 		 * set it to the size of our data structure, and then replace
1009 		 * it with the result of an allocs of the specified size.
1010 		 */
1011 		if ((r1 = dt_regset_alloc(drp)) == -1)
1012 			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1013 
1014 		dt_cg_setx(dlp, r1,
1015 		    ctf_type_size(dxp->dx_dst_ctfp, dxp->dx_dst_base));
1016 
1017 		instr = DIF_INSTR_ALLOCS(r1, r1);
1018 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1019 
1020 		/*
1021 		 * When dt_cg_asgn_op() is called, we have already generated
1022 		 * code for dnp->dn_right, which is the translator input.  We
1023 		 * now associate this register with the translator's input
1024 		 * identifier so it can be referenced during our member loop.
1025 		 */
1026 		dxp->dx_ident->di_flags |= DT_IDFLG_CGREG;
1027 		dxp->dx_ident->di_id = dnp->dn_right->dn_reg;
1028 
1029 		for (mnp = dxp->dx_members; mnp != NULL; mnp = mnp->dn_list) {
1030 			/*
1031 			 * Generate code for the translator member expression,
1032 			 * and then cast the result to the member type.
1033 			 */
1034 			dt_cg_node(mnp->dn_membexpr, dlp, drp);
1035 			mnp->dn_reg = mnp->dn_membexpr->dn_reg;
1036 			dt_cg_typecast(mnp->dn_membexpr, mnp, dlp, drp);
1037 
1038 			/*
1039 			 * Ask CTF for the offset of the member so we can store
1040 			 * to the appropriate offset.  This call has already
1041 			 * been done once by the parser, so it should succeed.
1042 			 */
1043 			if (ctf_member_info(dxp->dx_dst_ctfp, dxp->dx_dst_base,
1044 			    mnp->dn_membname, &ctm) == CTF_ERR) {
1045 				yypcb->pcb_hdl->dt_ctferr =
1046 				    ctf_errno(dxp->dx_dst_ctfp);
1047 				longjmp(yypcb->pcb_jmpbuf, EDT_CTF);
1048 			}
1049 
1050 			/*
1051 			 * If the destination member is at offset 0, store the
1052 			 * result directly to r1 (the scratch buffer address).
1053 			 * Otherwise allocate another temporary for the offset
1054 			 * and add r1 to it before storing the result.
1055 			 */
1056 			if (ctm.ctm_offset != 0) {
1057 				if ((r2 = dt_regset_alloc(drp)) == -1)
1058 					longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1059 
1060 				/*
1061 				 * Add the member offset rounded down to the
1062 				 * nearest byte.  If the offset was not aligned
1063 				 * on a byte boundary, this member is a bit-
1064 				 * field and dt_cg_store() will handle masking.
1065 				 */
1066 				dt_cg_setx(dlp, r2, ctm.ctm_offset / NBBY);
1067 				instr = DIF_INSTR_FMT(DIF_OP_ADD, r1, r2, r2);
1068 				dt_irlist_append(dlp,
1069 				    dt_cg_node_alloc(DT_LBL_NONE, instr));
1070 
1071 				dt_node_type_propagate(mnp, &dn);
1072 				dn.dn_right->dn_string = mnp->dn_membname;
1073 				dn.dn_reg = r2;
1074 
1075 				dt_cg_store(mnp, dlp, drp, &dn);
1076 				dt_regset_free(drp, r2);
1077 
1078 			} else {
1079 				dt_node_type_propagate(mnp, &dn);
1080 				dn.dn_right->dn_string = mnp->dn_membname;
1081 				dn.dn_reg = r1;
1082 
1083 				dt_cg_store(mnp, dlp, drp, &dn);
1084 			}
1085 
1086 			dt_regset_free(drp, mnp->dn_reg);
1087 		}
1088 
1089 		dxp->dx_ident->di_flags &= ~DT_IDFLG_CGREG;
1090 		dxp->dx_ident->di_id = 0;
1091 
1092 		if (dnp->dn_right->dn_reg != -1)
1093 			dt_regset_free(drp, dnp->dn_right->dn_reg);
1094 
1095 		assert(dnp->dn_reg == dnp->dn_right->dn_reg);
1096 		dnp->dn_reg = r1;
1097 	}
1098 
1099 	/*
1100 	 * If we are storing to a variable, generate an stv instruction from
1101 	 * the variable specified by the identifier.  If we are storing to a
1102 	 * memory address, generate code again for the left-hand side using
1103 	 * DT_NF_REF to get the address, and then generate a store to it.
1104 	 * In both paths, we assume dnp->dn_reg already has the new value.
1105 	 */
1106 	if (dnp->dn_left->dn_kind == DT_NODE_VAR) {
1107 		idp = dt_ident_resolve(dnp->dn_left->dn_ident);
1108 
1109 		if (idp->di_kind == DT_IDENT_ARRAY)
1110 			dt_cg_arglist(idp, dnp->dn_left->dn_args, dlp, drp);
1111 
1112 		idp->di_flags |= DT_IDFLG_DIFW;
1113 		instr = DIF_INSTR_STV(dt_cg_stvar(idp),
1114 		    idp->di_id, dnp->dn_reg);
1115 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1116 	} else {
1117 		uint_t rbit = dnp->dn_left->dn_flags & DT_NF_REF;
1118 
1119 		assert(dnp->dn_left->dn_flags & DT_NF_WRITABLE);
1120 		assert(dnp->dn_left->dn_flags & DT_NF_LVALUE);
1121 
1122 		dnp->dn_left->dn_flags |= DT_NF_REF; /* force pass-by-ref */
1123 
1124 		dt_cg_node(dnp->dn_left, dlp, drp);
1125 		dt_cg_store(dnp, dlp, drp, dnp->dn_left);
1126 		dt_regset_free(drp, dnp->dn_left->dn_reg);
1127 
1128 		dnp->dn_left->dn_flags &= ~DT_NF_REF;
1129 		dnp->dn_left->dn_flags |= rbit;
1130 	}
1131 }
1132 
1133 static void
1134 dt_cg_assoc_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
1135 {
1136 	dif_instr_t instr;
1137 	uint_t op;
1138 
1139 	assert(dnp->dn_kind == DT_NODE_VAR);
1140 	assert(!(dnp->dn_ident->di_flags & DT_IDFLG_LOCAL));
1141 	assert(dnp->dn_args != NULL);
1142 
1143 	dt_cg_arglist(dnp->dn_ident, dnp->dn_args, dlp, drp);
1144 
1145 	if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1146 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1147 
1148 	if (dnp->dn_ident->di_flags & DT_IDFLG_TLS)
1149 		op = DIF_OP_LDTAA;
1150 	else
1151 		op = DIF_OP_LDGAA;
1152 
1153 	dnp->dn_ident->di_flags |= DT_IDFLG_DIFR;
1154 	instr = DIF_INSTR_LDV(op, dnp->dn_ident->di_id, dnp->dn_reg);
1155 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1156 
1157 	/*
1158 	 * If the associative array is a pass-by-reference type, then we are
1159 	 * loading its value as a pointer to either load or store through it.
1160 	 * The array element in question may not have been faulted in yet, in
1161 	 * which case DIF_OP_LD*AA will return zero.  We append an epilogue
1162 	 * of instructions similar to the following:
1163 	 *
1164 	 *	  ld?aa	 id, %r1	! base ld?aa instruction above
1165 	 *	  tst	 %r1		! start of epilogue
1166 	 *   +--- bne	 label
1167 	 *   |    setx	 size, %r1
1168 	 *   |    allocs %r1, %r1
1169 	 *   |    st?aa	 id, %r1
1170 	 *   |    ld?aa	 id, %r1
1171 	 *   v
1172 	 * label: < rest of code >
1173 	 *
1174 	 * The idea is that we allocs a zero-filled chunk of scratch space and
1175 	 * do a DIF_OP_ST*AA to fault in and initialize the array element, and
1176 	 * then reload it to get the faulted-in address of the new variable
1177 	 * storage.  This isn't cheap, but pass-by-ref associative array values
1178 	 * are (thus far) uncommon and the allocs cost only occurs once.  If
1179 	 * this path becomes important to DTrace users, we can improve things
1180 	 * by adding a new DIF opcode to fault in associative array elements.
1181 	 */
1182 	if (dnp->dn_flags & DT_NF_REF) {
1183 		uint_t stvop = op == DIF_OP_LDTAA ? DIF_OP_STTAA : DIF_OP_STGAA;
1184 		uint_t label = dt_irlist_label(dlp);
1185 
1186 		instr = DIF_INSTR_TST(dnp->dn_reg);
1187 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1188 
1189 		instr = DIF_INSTR_BRANCH(DIF_OP_BNE, label);
1190 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1191 
1192 		dt_cg_setx(dlp, dnp->dn_reg, dt_node_type_size(dnp));
1193 		instr = DIF_INSTR_ALLOCS(dnp->dn_reg, dnp->dn_reg);
1194 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1195 
1196 		dnp->dn_ident->di_flags |= DT_IDFLG_DIFW;
1197 		instr = DIF_INSTR_STV(stvop, dnp->dn_ident->di_id, dnp->dn_reg);
1198 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1199 
1200 		instr = DIF_INSTR_LDV(op, dnp->dn_ident->di_id, dnp->dn_reg);
1201 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1202 
1203 		dt_irlist_append(dlp, dt_cg_node_alloc(label, DIF_INSTR_NOP));
1204 	}
1205 }
1206 
1207 static void
1208 dt_cg_array_op(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
1209 {
1210 	dt_probe_t *prp = yypcb->pcb_probe;
1211 	uintmax_t saved = dnp->dn_args->dn_value;
1212 	dt_ident_t *idp = dnp->dn_ident;
1213 
1214 	dif_instr_t instr;
1215 	uint_t op;
1216 	size_t size;
1217 	int reg, n;
1218 
1219 	assert(dnp->dn_kind == DT_NODE_VAR);
1220 	assert(!(idp->di_flags & DT_IDFLG_LOCAL));
1221 
1222 	assert(dnp->dn_args->dn_kind == DT_NODE_INT);
1223 	assert(dnp->dn_args->dn_list == NULL);
1224 
1225 	/*
1226 	 * If this is a reference in the args[] array, temporarily modify the
1227 	 * array index according to the static argument mapping (if any),
1228 	 * unless the argument reference is provided by a dynamic translator.
1229 	 * If we're using a dynamic translator for args[], then just set dn_reg
1230 	 * to an invalid reg and return: DIF_OP_XLARG will fetch the arg later.
1231 	 */
1232 	if (idp->di_id == DIF_VAR_ARGS) {
1233 		if ((idp->di_kind == DT_IDENT_XLPTR ||
1234 		    idp->di_kind == DT_IDENT_XLSOU) &&
1235 		    dt_xlator_dynamic(idp->di_data)) {
1236 			dnp->dn_reg = -1;
1237 			return;
1238 		}
1239 		dnp->dn_args->dn_value = prp->pr_mapping[saved];
1240 	}
1241 
1242 	dt_cg_node(dnp->dn_args, dlp, drp);
1243 	dnp->dn_args->dn_value = saved;
1244 
1245 	dnp->dn_reg = dnp->dn_args->dn_reg;
1246 
1247 	if (idp->di_flags & DT_IDFLG_TLS)
1248 		op = DIF_OP_LDTA;
1249 	else
1250 		op = DIF_OP_LDGA;
1251 
1252 	idp->di_flags |= DT_IDFLG_DIFR;
1253 
1254 	instr = DIF_INSTR_LDA(op, idp->di_id,
1255 	    dnp->dn_args->dn_reg, dnp->dn_reg);
1256 
1257 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1258 
1259 	/*
1260 	 * If this is a reference to the args[] array, we need to take the
1261 	 * additional step of explicitly eliminating any bits larger than the
1262 	 * type size: the DIF interpreter in the kernel will always give us
1263 	 * the raw (64-bit) argument value, and any bits larger than the type
1264 	 * size may be junk.  As a practical matter, this arises only on 64-bit
1265 	 * architectures and only when the argument index is larger than the
1266 	 * number of arguments passed directly to DTrace: if a 8-, 16- or
1267 	 * 32-bit argument must be retrieved from the stack, it is possible
1268 	 * (and it some cases, likely) that the upper bits will be garbage.
1269 	 */
1270 	if (idp->di_id != DIF_VAR_ARGS || !dt_node_is_scalar(dnp))
1271 		return;
1272 
1273 	if ((size = dt_node_type_size(dnp)) == sizeof (uint64_t))
1274 		return;
1275 
1276 	if ((reg = dt_regset_alloc(drp)) == -1)
1277 		longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1278 
1279 	assert(size < sizeof (uint64_t));
1280 	n = sizeof (uint64_t) * NBBY - size * NBBY;
1281 
1282 	dt_cg_setx(dlp, reg, n);
1283 
1284 	instr = DIF_INSTR_FMT(DIF_OP_SLL, dnp->dn_reg, reg, dnp->dn_reg);
1285 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1286 
1287 	instr = DIF_INSTR_FMT((dnp->dn_flags & DT_NF_SIGNED) ?
1288 	    DIF_OP_SRA : DIF_OP_SRL, dnp->dn_reg, reg, dnp->dn_reg);
1289 
1290 	dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1291 	dt_regset_free(drp, reg);
1292 }
1293 
1294 /*
1295  * Generate code for an inlined variable reference.  Inlines can be used to
1296  * define either scalar or associative array substitutions.  For scalars, we
1297  * simply generate code for the parse tree saved in the identifier's din_root,
1298  * and then cast the resulting expression to the inline's declaration type.
1299  * For arrays, we take the input parameter subtrees from dnp->dn_args and
1300  * temporarily store them in the din_root of each din_argv[i] identifier,
1301  * which are themselves inlines and were set up for us by the parser.  The
1302  * result is that any reference to the inlined parameter inside the top-level
1303  * din_root will turn into a recursive call to dt_cg_inline() for a scalar
1304  * inline whose din_root will refer to the subtree pointed to by the argument.
1305  */
1306 static void
1307 dt_cg_inline(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
1308 {
1309 	dt_ident_t *idp = dnp->dn_ident;
1310 	dt_idnode_t *inp = idp->di_iarg;
1311 
1312 	dt_idnode_t *pinp;
1313 	dt_node_t *pnp;
1314 	int i;
1315 
1316 	assert(idp->di_flags & DT_IDFLG_INLINE);
1317 	assert(idp->di_ops == &dt_idops_inline);
1318 
1319 	if (idp->di_kind == DT_IDENT_ARRAY) {
1320 		for (i = 0, pnp = dnp->dn_args;
1321 		    pnp != NULL; pnp = pnp->dn_list, i++) {
1322 			if (inp->din_argv[i] != NULL) {
1323 				pinp = inp->din_argv[i]->di_iarg;
1324 				pinp->din_root = pnp;
1325 			}
1326 		}
1327 	}
1328 
1329 	dt_cg_node(inp->din_root, dlp, drp);
1330 	dnp->dn_reg = inp->din_root->dn_reg;
1331 	dt_cg_typecast(inp->din_root, dnp, dlp, drp);
1332 
1333 	if (idp->di_kind == DT_IDENT_ARRAY) {
1334 		for (i = 0; i < inp->din_argc; i++) {
1335 			pinp = inp->din_argv[i]->di_iarg;
1336 			pinp->din_root = NULL;
1337 		}
1338 	}
1339 }
1340 
1341 static void
1342 dt_cg_node(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
1343 {
1344 	ctf_file_t *ctfp = dnp->dn_ctfp;
1345 	ctf_file_t *octfp;
1346 	ctf_membinfo_t m;
1347 	ctf_id_t type;
1348 
1349 	dif_instr_t instr;
1350 	dt_ident_t *idp;
1351 	ssize_t stroff;
1352 	uint_t op;
1353 	int reg;
1354 
1355 	switch (dnp->dn_op) {
1356 	case DT_TOK_COMMA:
1357 		dt_cg_node(dnp->dn_left, dlp, drp);
1358 		dt_regset_free(drp, dnp->dn_left->dn_reg);
1359 		dt_cg_node(dnp->dn_right, dlp, drp);
1360 		dnp->dn_reg = dnp->dn_right->dn_reg;
1361 		break;
1362 
1363 	case DT_TOK_ASGN:
1364 		dt_cg_node(dnp->dn_right, dlp, drp);
1365 		dnp->dn_reg = dnp->dn_right->dn_reg;
1366 		dt_cg_asgn_op(dnp, dlp, drp);
1367 		break;
1368 
1369 	case DT_TOK_ADD_EQ:
1370 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_ADD);
1371 		dt_cg_asgn_op(dnp, dlp, drp);
1372 		break;
1373 
1374 	case DT_TOK_SUB_EQ:
1375 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_SUB);
1376 		dt_cg_asgn_op(dnp, dlp, drp);
1377 		break;
1378 
1379 	case DT_TOK_MUL_EQ:
1380 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_MUL);
1381 		dt_cg_asgn_op(dnp, dlp, drp);
1382 		break;
1383 
1384 	case DT_TOK_DIV_EQ:
1385 		dt_cg_arithmetic_op(dnp, dlp, drp,
1386 		    (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SDIV : DIF_OP_UDIV);
1387 		dt_cg_asgn_op(dnp, dlp, drp);
1388 		break;
1389 
1390 	case DT_TOK_MOD_EQ:
1391 		dt_cg_arithmetic_op(dnp, dlp, drp,
1392 		    (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SREM : DIF_OP_UREM);
1393 		dt_cg_asgn_op(dnp, dlp, drp);
1394 		break;
1395 
1396 	case DT_TOK_AND_EQ:
1397 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_AND);
1398 		dt_cg_asgn_op(dnp, dlp, drp);
1399 		break;
1400 
1401 	case DT_TOK_XOR_EQ:
1402 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_XOR);
1403 		dt_cg_asgn_op(dnp, dlp, drp);
1404 		break;
1405 
1406 	case DT_TOK_OR_EQ:
1407 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_OR);
1408 		dt_cg_asgn_op(dnp, dlp, drp);
1409 		break;
1410 
1411 	case DT_TOK_LSH_EQ:
1412 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_SLL);
1413 		dt_cg_asgn_op(dnp, dlp, drp);
1414 		break;
1415 
1416 	case DT_TOK_RSH_EQ:
1417 		dt_cg_arithmetic_op(dnp, dlp, drp,
1418 		    (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SRA : DIF_OP_SRL);
1419 		dt_cg_asgn_op(dnp, dlp, drp);
1420 		break;
1421 
1422 	case DT_TOK_QUESTION:
1423 		dt_cg_ternary_op(dnp, dlp, drp);
1424 		break;
1425 
1426 	case DT_TOK_LOR:
1427 		dt_cg_logical_or(dnp, dlp, drp);
1428 		break;
1429 
1430 	case DT_TOK_LXOR:
1431 		dt_cg_logical_xor(dnp, dlp, drp);
1432 		break;
1433 
1434 	case DT_TOK_LAND:
1435 		dt_cg_logical_and(dnp, dlp, drp);
1436 		break;
1437 
1438 	case DT_TOK_BOR:
1439 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_OR);
1440 		break;
1441 
1442 	case DT_TOK_XOR:
1443 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_XOR);
1444 		break;
1445 
1446 	case DT_TOK_BAND:
1447 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_AND);
1448 		break;
1449 
1450 	case DT_TOK_EQU:
1451 		dt_cg_compare_op(dnp, dlp, drp, DIF_OP_BE);
1452 		break;
1453 
1454 	case DT_TOK_NEQ:
1455 		dt_cg_compare_op(dnp, dlp, drp, DIF_OP_BNE);
1456 		break;
1457 
1458 	case DT_TOK_LT:
1459 		dt_cg_compare_op(dnp, dlp, drp,
1460 		    dt_cg_compare_signed(dnp) ? DIF_OP_BL : DIF_OP_BLU);
1461 		break;
1462 
1463 	case DT_TOK_LE:
1464 		dt_cg_compare_op(dnp, dlp, drp,
1465 		    dt_cg_compare_signed(dnp) ? DIF_OP_BLE : DIF_OP_BLEU);
1466 		break;
1467 
1468 	case DT_TOK_GT:
1469 		dt_cg_compare_op(dnp, dlp, drp,
1470 		    dt_cg_compare_signed(dnp) ? DIF_OP_BG : DIF_OP_BGU);
1471 		break;
1472 
1473 	case DT_TOK_GE:
1474 		dt_cg_compare_op(dnp, dlp, drp,
1475 		    dt_cg_compare_signed(dnp) ? DIF_OP_BGE : DIF_OP_BGEU);
1476 		break;
1477 
1478 	case DT_TOK_LSH:
1479 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_SLL);
1480 		break;
1481 
1482 	case DT_TOK_RSH:
1483 		dt_cg_arithmetic_op(dnp, dlp, drp,
1484 		    (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SRA : DIF_OP_SRL);
1485 		break;
1486 
1487 	case DT_TOK_ADD:
1488 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_ADD);
1489 		break;
1490 
1491 	case DT_TOK_SUB:
1492 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_SUB);
1493 		break;
1494 
1495 	case DT_TOK_MUL:
1496 		dt_cg_arithmetic_op(dnp, dlp, drp, DIF_OP_MUL);
1497 		break;
1498 
1499 	case DT_TOK_DIV:
1500 		dt_cg_arithmetic_op(dnp, dlp, drp,
1501 		    (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SDIV : DIF_OP_UDIV);
1502 		break;
1503 
1504 	case DT_TOK_MOD:
1505 		dt_cg_arithmetic_op(dnp, dlp, drp,
1506 		    (dnp->dn_flags & DT_NF_SIGNED) ? DIF_OP_SREM : DIF_OP_UREM);
1507 		break;
1508 
1509 	case DT_TOK_LNEG:
1510 		dt_cg_logical_neg(dnp, dlp, drp);
1511 		break;
1512 
1513 	case DT_TOK_BNEG:
1514 		dt_cg_node(dnp->dn_child, dlp, drp);
1515 		dnp->dn_reg = dnp->dn_child->dn_reg;
1516 		instr = DIF_INSTR_NOT(dnp->dn_reg, dnp->dn_reg);
1517 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1518 		break;
1519 
1520 	case DT_TOK_PREINC:
1521 		dt_cg_prearith_op(dnp, dlp, drp, DIF_OP_ADD);
1522 		break;
1523 
1524 	case DT_TOK_POSTINC:
1525 		dt_cg_postarith_op(dnp, dlp, drp, DIF_OP_ADD);
1526 		break;
1527 
1528 	case DT_TOK_PREDEC:
1529 		dt_cg_prearith_op(dnp, dlp, drp, DIF_OP_SUB);
1530 		break;
1531 
1532 	case DT_TOK_POSTDEC:
1533 		dt_cg_postarith_op(dnp, dlp, drp, DIF_OP_SUB);
1534 		break;
1535 
1536 	case DT_TOK_IPOS:
1537 		dt_cg_node(dnp->dn_child, dlp, drp);
1538 		dnp->dn_reg = dnp->dn_child->dn_reg;
1539 		break;
1540 
1541 	case DT_TOK_INEG:
1542 		dt_cg_node(dnp->dn_child, dlp, drp);
1543 		dnp->dn_reg = dnp->dn_child->dn_reg;
1544 
1545 		instr = DIF_INSTR_FMT(DIF_OP_SUB, DIF_REG_R0,
1546 		    dnp->dn_reg, dnp->dn_reg);
1547 
1548 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1549 		break;
1550 
1551 	case DT_TOK_DEREF:
1552 		dt_cg_node(dnp->dn_child, dlp, drp);
1553 		dnp->dn_reg = dnp->dn_child->dn_reg;
1554 
1555 		if (!(dnp->dn_flags & DT_NF_REF)) {
1556 			uint_t ubit = dnp->dn_flags & DT_NF_USERLAND;
1557 
1558 			/*
1559 			 * Save and restore DT_NF_USERLAND across dt_cg_load():
1560 			 * we need the sign bit from dnp and the user bit from
1561 			 * dnp->dn_child in order to get the proper opcode.
1562 			 */
1563 			dnp->dn_flags |=
1564 			    (dnp->dn_child->dn_flags & DT_NF_USERLAND);
1565 
1566 			instr = DIF_INSTR_LOAD(dt_cg_load(dnp, ctfp,
1567 			    dnp->dn_type), dnp->dn_reg, dnp->dn_reg);
1568 
1569 			dnp->dn_flags &= ~DT_NF_USERLAND;
1570 			dnp->dn_flags |= ubit;
1571 
1572 			dt_irlist_append(dlp,
1573 			    dt_cg_node_alloc(DT_LBL_NONE, instr));
1574 		}
1575 		break;
1576 
1577 	case DT_TOK_ADDROF: {
1578 		uint_t rbit = dnp->dn_child->dn_flags & DT_NF_REF;
1579 
1580 		dnp->dn_child->dn_flags |= DT_NF_REF; /* force pass-by-ref */
1581 		dt_cg_node(dnp->dn_child, dlp, drp);
1582 		dnp->dn_reg = dnp->dn_child->dn_reg;
1583 
1584 		dnp->dn_child->dn_flags &= ~DT_NF_REF;
1585 		dnp->dn_child->dn_flags |= rbit;
1586 		break;
1587 	}
1588 
1589 	case DT_TOK_SIZEOF: {
1590 		size_t size = dt_node_sizeof(dnp->dn_child);
1591 
1592 		if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1593 			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1594 
1595 		assert(size != 0);
1596 		dt_cg_setx(dlp, dnp->dn_reg, size);
1597 		break;
1598 	}
1599 
1600 	case DT_TOK_STRINGOF:
1601 		dt_cg_node(dnp->dn_child, dlp, drp);
1602 		dnp->dn_reg = dnp->dn_child->dn_reg;
1603 		break;
1604 
1605 	case DT_TOK_XLATE:
1606 		/*
1607 		 * An xlate operator appears in either an XLATOR, indicating a
1608 		 * reference to a dynamic translator, or an OP2, indicating
1609 		 * use of the xlate operator in the user's program.  For the
1610 		 * dynamic case, generate an xlate opcode with a reference to
1611 		 * the corresponding member, pre-computed for us in dn_members.
1612 		 */
1613 		if (dnp->dn_kind == DT_NODE_XLATOR) {
1614 			dt_xlator_t *dxp = dnp->dn_xlator;
1615 
1616 			assert(dxp->dx_ident->di_flags & DT_IDFLG_CGREG);
1617 			assert(dxp->dx_ident->di_id != 0);
1618 
1619 			if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1620 				longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1621 
1622 			if (dxp->dx_arg == -1) {
1623 				instr = DIF_INSTR_MOV(
1624 				    dxp->dx_ident->di_id, dnp->dn_reg);
1625 				dt_irlist_append(dlp,
1626 				    dt_cg_node_alloc(DT_LBL_NONE, instr));
1627 				op = DIF_OP_XLATE;
1628 			} else
1629 				op = DIF_OP_XLARG;
1630 
1631 			instr = DIF_INSTR_XLATE(op, 0, dnp->dn_reg);
1632 			dt_irlist_append(dlp,
1633 			    dt_cg_node_alloc(DT_LBL_NONE, instr));
1634 
1635 			dlp->dl_last->di_extern = dnp->dn_xmember;
1636 			break;
1637 		}
1638 
1639 		assert(dnp->dn_kind == DT_NODE_OP2);
1640 		dt_cg_node(dnp->dn_right, dlp, drp);
1641 		dnp->dn_reg = dnp->dn_right->dn_reg;
1642 		break;
1643 
1644 	case DT_TOK_LPAR:
1645 		dt_cg_node(dnp->dn_right, dlp, drp);
1646 		dnp->dn_reg = dnp->dn_right->dn_reg;
1647 		dt_cg_typecast(dnp->dn_right, dnp, dlp, drp);
1648 		break;
1649 
1650 	case DT_TOK_PTR:
1651 	case DT_TOK_DOT:
1652 		assert(dnp->dn_right->dn_kind == DT_NODE_IDENT);
1653 		dt_cg_node(dnp->dn_left, dlp, drp);
1654 
1655 		/*
1656 		 * If the left-hand side of PTR or DOT is a dynamic variable,
1657 		 * we expect it to be the output of a D translator.   In this
1658 		 * case, we look up the parse tree corresponding to the member
1659 		 * that is being accessed and run the code generator over it.
1660 		 * We then cast the result as if by the assignment operator.
1661 		 */
1662 		if ((idp = dt_node_resolve(
1663 		    dnp->dn_left, DT_IDENT_XLSOU)) != NULL ||
1664 		    (idp = dt_node_resolve(
1665 		    dnp->dn_left, DT_IDENT_XLPTR)) != NULL) {
1666 
1667 			dt_xlator_t *dxp;
1668 			dt_node_t *mnp;
1669 
1670 			dxp = idp->di_data;
1671 			mnp = dt_xlator_member(dxp, dnp->dn_right->dn_string);
1672 			assert(mnp != NULL);
1673 
1674 			dxp->dx_ident->di_flags |= DT_IDFLG_CGREG;
1675 			dxp->dx_ident->di_id = dnp->dn_left->dn_reg;
1676 
1677 			dt_cg_node(mnp->dn_membexpr, dlp, drp);
1678 			dnp->dn_reg = mnp->dn_membexpr->dn_reg;
1679 			dt_cg_typecast(mnp->dn_membexpr, dnp, dlp, drp);
1680 
1681 			dxp->dx_ident->di_flags &= ~DT_IDFLG_CGREG;
1682 			dxp->dx_ident->di_id = 0;
1683 
1684 			if (dnp->dn_left->dn_reg != -1)
1685 				dt_regset_free(drp, dnp->dn_left->dn_reg);
1686 			break;
1687 		}
1688 
1689 		ctfp = dnp->dn_left->dn_ctfp;
1690 		type = ctf_type_resolve(ctfp, dnp->dn_left->dn_type);
1691 
1692 		if (dnp->dn_op == DT_TOK_PTR) {
1693 			type = ctf_type_reference(ctfp, type);
1694 			type = ctf_type_resolve(ctfp, type);
1695 		}
1696 
1697 		if ((ctfp = dt_cg_membinfo(octfp = ctfp, type,
1698 		    dnp->dn_right->dn_string, &m)) == NULL) {
1699 			yypcb->pcb_hdl->dt_ctferr = ctf_errno(octfp);
1700 			longjmp(yypcb->pcb_jmpbuf, EDT_CTF);
1701 		}
1702 
1703 		if (m.ctm_offset != 0) {
1704 			if ((reg = dt_regset_alloc(drp)) == -1)
1705 				longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1706 
1707 			/*
1708 			 * If the offset is not aligned on a byte boundary, it
1709 			 * is a bit-field member and we will extract the value
1710 			 * bits below after we generate the appropriate load.
1711 			 */
1712 			dt_cg_setx(dlp, reg, m.ctm_offset / NBBY);
1713 
1714 			instr = DIF_INSTR_FMT(DIF_OP_ADD,
1715 			    dnp->dn_left->dn_reg, reg, dnp->dn_left->dn_reg);
1716 
1717 			dt_irlist_append(dlp,
1718 			    dt_cg_node_alloc(DT_LBL_NONE, instr));
1719 			dt_regset_free(drp, reg);
1720 		}
1721 
1722 		if (!(dnp->dn_flags & DT_NF_REF)) {
1723 			uint_t ubit = dnp->dn_flags & DT_NF_USERLAND;
1724 
1725 			/*
1726 			 * Save and restore DT_NF_USERLAND across dt_cg_load():
1727 			 * we need the sign bit from dnp and the user bit from
1728 			 * dnp->dn_left in order to get the proper opcode.
1729 			 */
1730 			dnp->dn_flags |=
1731 			    (dnp->dn_left->dn_flags & DT_NF_USERLAND);
1732 
1733 			instr = DIF_INSTR_LOAD(dt_cg_load(dnp,
1734 			    ctfp, m.ctm_type), dnp->dn_left->dn_reg,
1735 			    dnp->dn_left->dn_reg);
1736 
1737 			dnp->dn_flags &= ~DT_NF_USERLAND;
1738 			dnp->dn_flags |= ubit;
1739 
1740 			dt_irlist_append(dlp,
1741 			    dt_cg_node_alloc(DT_LBL_NONE, instr));
1742 
1743 			if (dnp->dn_flags & DT_NF_BITFIELD)
1744 				dt_cg_field_get(dnp, dlp, drp, ctfp, &m);
1745 		}
1746 
1747 		dnp->dn_reg = dnp->dn_left->dn_reg;
1748 		break;
1749 
1750 	case DT_TOK_STRING:
1751 		if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1752 			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1753 
1754 		assert(dnp->dn_kind == DT_NODE_STRING);
1755 		stroff = dt_strtab_insert(yypcb->pcb_strtab, dnp->dn_string);
1756 
1757 		if (stroff == -1L)
1758 			longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
1759 		if (stroff > DIF_STROFF_MAX)
1760 			longjmp(yypcb->pcb_jmpbuf, EDT_STR2BIG);
1761 
1762 		instr = DIF_INSTR_SETS((ulong_t)stroff, dnp->dn_reg);
1763 		dt_irlist_append(dlp, dt_cg_node_alloc(DT_LBL_NONE, instr));
1764 		break;
1765 
1766 	case DT_TOK_IDENT:
1767 		/*
1768 		 * If the specified identifier is a variable on which we have
1769 		 * set the code generator register flag, then this variable
1770 		 * has already had code generated for it and saved in di_id.
1771 		 * Allocate a new register and copy the existing value to it.
1772 		 */
1773 		if (dnp->dn_kind == DT_NODE_VAR &&
1774 		    (dnp->dn_ident->di_flags & DT_IDFLG_CGREG)) {
1775 			if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1776 				longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1777 			instr = DIF_INSTR_MOV(dnp->dn_ident->di_id,
1778 			    dnp->dn_reg);
1779 			dt_irlist_append(dlp,
1780 			    dt_cg_node_alloc(DT_LBL_NONE, instr));
1781 			break;
1782 		}
1783 
1784 		/*
1785 		 * Identifiers can represent function calls, variable refs, or
1786 		 * symbols.  First we check for inlined variables, and handle
1787 		 * them by generating code for the inline parse tree.
1788 		 */
1789 		if (dnp->dn_kind == DT_NODE_VAR &&
1790 		    (dnp->dn_ident->di_flags & DT_IDFLG_INLINE)) {
1791 			dt_cg_inline(dnp, dlp, drp);
1792 			break;
1793 		}
1794 
1795 		switch (dnp->dn_kind) {
1796 		case DT_NODE_FUNC:
1797 			if ((idp = dnp->dn_ident)->di_kind != DT_IDENT_FUNC) {
1798 				dnerror(dnp, D_CG_EXPR, "%s %s( ) may not be "
1799 				    "called from a D expression (D program "
1800 				    "context required)\n",
1801 				    dt_idkind_name(idp->di_kind), idp->di_name);
1802 			}
1803 
1804 			dt_cg_arglist(dnp->dn_ident, dnp->dn_args, dlp, drp);
1805 
1806 			if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1807 				longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1808 
1809 			instr = DIF_INSTR_CALL(
1810 			    dnp->dn_ident->di_id, dnp->dn_reg);
1811 
1812 			dt_irlist_append(dlp,
1813 			    dt_cg_node_alloc(DT_LBL_NONE, instr));
1814 
1815 			break;
1816 
1817 		case DT_NODE_VAR:
1818 			if (dnp->dn_ident->di_kind == DT_IDENT_XLSOU ||
1819 			    dnp->dn_ident->di_kind == DT_IDENT_XLPTR) {
1820 				/*
1821 				 * This can only happen if we have translated
1822 				 * args[].  See dt_idcook_args() for details.
1823 				 */
1824 				assert(dnp->dn_ident->di_id == DIF_VAR_ARGS);
1825 				dt_cg_array_op(dnp, dlp, drp);
1826 				break;
1827 			}
1828 
1829 			if (dnp->dn_ident->di_kind == DT_IDENT_ARRAY) {
1830 				if (dnp->dn_ident->di_id > DIF_VAR_ARRAY_MAX)
1831 					dt_cg_assoc_op(dnp, dlp, drp);
1832 				else
1833 					dt_cg_array_op(dnp, dlp, drp);
1834 				break;
1835 			}
1836 
1837 			if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1838 				longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1839 
1840 			if (dnp->dn_ident->di_flags & DT_IDFLG_LOCAL)
1841 				op = DIF_OP_LDLS;
1842 			else if (dnp->dn_ident->di_flags & DT_IDFLG_TLS)
1843 				op = DIF_OP_LDTS;
1844 			else
1845 				op = DIF_OP_LDGS;
1846 
1847 			dnp->dn_ident->di_flags |= DT_IDFLG_DIFR;
1848 
1849 			instr = DIF_INSTR_LDV(op,
1850 			    dnp->dn_ident->di_id, dnp->dn_reg);
1851 
1852 			dt_irlist_append(dlp,
1853 			    dt_cg_node_alloc(DT_LBL_NONE, instr));
1854 			break;
1855 
1856 		case DT_NODE_SYM: {
1857 			dtrace_hdl_t *dtp = yypcb->pcb_hdl;
1858 			dtrace_syminfo_t *sip = dnp->dn_ident->di_data;
1859 			GElf_Sym sym;
1860 
1861 			if (dtrace_lookup_by_name(dtp,
1862 			    sip->dts_object, sip->dts_name, &sym, NULL) == -1) {
1863 				xyerror(D_UNKNOWN, "cg failed for symbol %s`%s:"
1864 				    " %s\n", sip->dts_object, sip->dts_name,
1865 				    dtrace_errmsg(dtp, dtrace_errno(dtp)));
1866 			}
1867 
1868 			if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1869 				longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1870 
1871 			dt_cg_xsetx(dlp, dnp->dn_ident,
1872 			    DT_LBL_NONE, dnp->dn_reg, sym.st_value);
1873 
1874 			if (!(dnp->dn_flags & DT_NF_REF)) {
1875 				instr = DIF_INSTR_LOAD(dt_cg_load(dnp, ctfp,
1876 				    dnp->dn_type), dnp->dn_reg, dnp->dn_reg);
1877 				dt_irlist_append(dlp,
1878 				    dt_cg_node_alloc(DT_LBL_NONE, instr));
1879 			}
1880 			break;
1881 		}
1882 
1883 		default:
1884 			xyerror(D_UNKNOWN, "internal error -- node type %u is "
1885 			    "not valid for an identifier\n", dnp->dn_kind);
1886 		}
1887 		break;
1888 
1889 	case DT_TOK_INT:
1890 		if ((dnp->dn_reg = dt_regset_alloc(drp)) == -1)
1891 			longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
1892 
1893 		dt_cg_setx(dlp, dnp->dn_reg, dnp->dn_value);
1894 		break;
1895 
1896 	default:
1897 		xyerror(D_UNKNOWN, "internal error -- token type %u is not a "
1898 		    "valid D compilation token\n", dnp->dn_op);
1899 	}
1900 }
1901 
1902 void
1903 dt_cg(dt_pcb_t *pcb, dt_node_t *dnp)
1904 {
1905 	dif_instr_t instr;
1906 	dt_xlator_t *dxp;
1907 
1908 	if (pcb->pcb_regs == NULL && (pcb->pcb_regs =
1909 	    dt_regset_create(pcb->pcb_hdl->dt_conf.dtc_difintregs)) == NULL)
1910 		longjmp(pcb->pcb_jmpbuf, EDT_NOMEM);
1911 
1912 	dt_regset_reset(pcb->pcb_regs);
1913 	(void) dt_regset_alloc(pcb->pcb_regs); /* allocate %r0 */
1914 
1915 	if (pcb->pcb_inttab != NULL)
1916 		dt_inttab_destroy(pcb->pcb_inttab);
1917 
1918 	if ((pcb->pcb_inttab = dt_inttab_create(yypcb->pcb_hdl)) == NULL)
1919 		longjmp(pcb->pcb_jmpbuf, EDT_NOMEM);
1920 
1921 	if (pcb->pcb_strtab != NULL)
1922 		dt_strtab_destroy(pcb->pcb_strtab);
1923 
1924 	if ((pcb->pcb_strtab = dt_strtab_create(BUFSIZ)) == NULL)
1925 		longjmp(pcb->pcb_jmpbuf, EDT_NOMEM);
1926 
1927 	dt_irlist_destroy(&pcb->pcb_ir);
1928 	dt_irlist_create(&pcb->pcb_ir);
1929 
1930 	assert(pcb->pcb_dret == NULL);
1931 	pcb->pcb_dret = dnp;
1932 
1933 	if (dt_node_is_dynamic(dnp)) {
1934 		dnerror(dnp, D_CG_DYN, "expression cannot evaluate to result "
1935 		    "of dynamic type\n");
1936 	}
1937 
1938 	/*
1939 	 * If we're generating code for a translator body, assign the input
1940 	 * parameter to the first available register (i.e. caller passes %r1).
1941 	 */
1942 	if (dnp->dn_kind == DT_NODE_MEMBER) {
1943 		dxp = dnp->dn_membxlator;
1944 		dnp = dnp->dn_membexpr;
1945 
1946 		dxp->dx_ident->di_flags |= DT_IDFLG_CGREG;
1947 		dxp->dx_ident->di_id = dt_regset_alloc(pcb->pcb_regs);
1948 	}
1949 
1950 	dt_cg_node(dnp, &pcb->pcb_ir, pcb->pcb_regs);
1951 	instr = DIF_INSTR_RET(dnp->dn_reg);
1952 	dt_regset_free(pcb->pcb_regs, dnp->dn_reg);
1953 	dt_irlist_append(&pcb->pcb_ir, dt_cg_node_alloc(DT_LBL_NONE, instr));
1954 
1955 	if (dnp->dn_kind == DT_NODE_MEMBER) {
1956 		dt_regset_free(pcb->pcb_regs, dxp->dx_ident->di_id);
1957 		dxp->dx_ident->di_id = 0;
1958 		dxp->dx_ident->di_flags &= ~DT_IDFLG_CGREG;
1959 	}
1960 }
1961