Home | History | Annotate | Line # | Download | only in sljit_src
      1 /*	$NetBSD: sljitNativeSPARC_common.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $	*/
      2 
      3 /*
      4  *    Stack-less Just-In-Time compiler
      5  *
      6  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without modification, are
      9  * permitted provided that the following conditions are met:
     10  *
     11  *   1. Redistributions of source code must retain the above copyright notice, this list of
     12  *      conditions and the following disclaimer.
     13  *
     14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     15  *      of conditions and the following disclaimer in the documentation and/or other materials
     16  *      provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
     30 {
     31 	return "SPARC" SLJIT_CPUINFO;
     32 }
     33 
     34 /* Length of an instruction word
     35    Both for sparc-32 and sparc-64 */
     36 typedef sljit_u32 sljit_ins;
     37 
     38 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
     39 
     40 static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
     41 {
     42 #if defined(__SUNPRO_C) && __SUNPRO_C < 0x590
     43 	__asm (
     44 		/* if (from == to) return */
     45 		"cmp %i0, %i1\n"
     46 		"be .leave\n"
     47 		"nop\n"
     48 
     49 		/* loop until from >= to */
     50 		".mainloop:\n"
     51 		"flush %i0\n"
     52 		"add %i0, 8, %i0\n"
     53 		"cmp %i0, %i1\n"
     54 		"bcs .mainloop\n"
     55 		"nop\n"
     56 
     57 		/* The comparison was done above. */
     58 		"bne .leave\n"
     59 		/* nop is not necessary here, since the
     60 		   sub operation has no side effect. */
     61 		"sub %i0, 4, %i0\n"
     62 		"flush %i0\n"
     63 		".leave:"
     64 	);
     65 #else
     66 	if (SLJIT_UNLIKELY(from == to))
     67 		return;
     68 
     69 	do {
     70 		__asm__ volatile (
     71 			"flush %0\n"
     72 			: : "r"(from)
     73 		);
     74 		/* Operates at least on doubleword. */
     75 		from += 2;
     76 	} while (from < to);
     77 
     78 	if (from == to) {
     79 		/* Flush the last word. */
     80 		from --;
     81 		__asm__ volatile (
     82 			"flush %0\n"
     83 			: : "r"(from)
     84 		);
     85 	}
     86 #endif
     87 }
     88 
     89 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
     90 
     91 /* TMP_REG2 is not used by getput_arg */
     92 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     93 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     94 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
     95 #define TMP_LINK	(SLJIT_NUMBER_OF_REGISTERS + 5)
     96 
     97 #define TMP_FREG1	(0)
     98 #define TMP_FREG2	((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)
     99 
    100 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
    101 	0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15
    102 };
    103 
    104 /* --------------------------------------------------------------------- */
    105 /*  Instrucion forms                                                     */
    106 /* --------------------------------------------------------------------- */
    107 
    108 #define D(d)		(reg_map[d] << 25)
    109 #define DA(d)		((d) << 25)
    110 #define S1(s1)		(reg_map[s1] << 14)
    111 #define S2(s2)		(reg_map[s2])
    112 #define S1A(s1)		((s1) << 14)
    113 #define S2A(s2)		(s2)
    114 #define IMM_ARG		0x2000
    115 #define DOP(op)		((op) << 5)
    116 #define IMM(imm)	(((imm) & 0x1fff) | IMM_ARG)
    117 
    118 #define DR(dr)		(reg_map[dr])
    119 #define OPC1(opcode)	((opcode) << 30)
    120 #define OPC2(opcode)	((opcode) << 22)
    121 #define OPC3(opcode)	((opcode) << 19)
    122 #define SET_FLAGS	OPC3(0x10)
    123 
    124 #define ADD		(OPC1(0x2) | OPC3(0x00))
    125 #define ADDC		(OPC1(0x2) | OPC3(0x08))
    126 #define AND		(OPC1(0x2) | OPC3(0x01))
    127 #define ANDN		(OPC1(0x2) | OPC3(0x05))
    128 #define CALL		(OPC1(0x1))
    129 #define FABSS		(OPC1(0x2) | OPC3(0x34) | DOP(0x09))
    130 #define FADDD		(OPC1(0x2) | OPC3(0x34) | DOP(0x42))
    131 #define FADDS		(OPC1(0x2) | OPC3(0x34) | DOP(0x41))
    132 #define FCMPD		(OPC1(0x2) | OPC3(0x35) | DOP(0x52))
    133 #define FCMPS		(OPC1(0x2) | OPC3(0x35) | DOP(0x51))
    134 #define FDIVD		(OPC1(0x2) | OPC3(0x34) | DOP(0x4e))
    135 #define FDIVS		(OPC1(0x2) | OPC3(0x34) | DOP(0x4d))
    136 #define FDTOI		(OPC1(0x2) | OPC3(0x34) | DOP(0xd2))
    137 #define FDTOS		(OPC1(0x2) | OPC3(0x34) | DOP(0xc6))
    138 #define FITOD		(OPC1(0x2) | OPC3(0x34) | DOP(0xc8))
    139 #define FITOS		(OPC1(0x2) | OPC3(0x34) | DOP(0xc4))
    140 #define FMOVS		(OPC1(0x2) | OPC3(0x34) | DOP(0x01))
    141 #define FMULD		(OPC1(0x2) | OPC3(0x34) | DOP(0x4a))
    142 #define FMULS		(OPC1(0x2) | OPC3(0x34) | DOP(0x49))
    143 #define FNEGS		(OPC1(0x2) | OPC3(0x34) | DOP(0x05))
    144 #define FSTOD		(OPC1(0x2) | OPC3(0x34) | DOP(0xc9))
    145 #define FSTOI		(OPC1(0x2) | OPC3(0x34) | DOP(0xd1))
    146 #define FSUBD		(OPC1(0x2) | OPC3(0x34) | DOP(0x46))
    147 #define FSUBS		(OPC1(0x2) | OPC3(0x34) | DOP(0x45))
    148 #define JMPL		(OPC1(0x2) | OPC3(0x38))
    149 #define NOP		(OPC1(0x0) | OPC2(0x04))
    150 #define OR		(OPC1(0x2) | OPC3(0x02))
    151 #define ORN		(OPC1(0x2) | OPC3(0x06))
    152 #define RDY		(OPC1(0x2) | OPC3(0x28) | S1A(0))
    153 #define RESTORE		(OPC1(0x2) | OPC3(0x3d))
    154 #define SAVE		(OPC1(0x2) | OPC3(0x3c))
    155 #define SETHI		(OPC1(0x0) | OPC2(0x04))
    156 #define SLL		(OPC1(0x2) | OPC3(0x25))
    157 #define SLLX		(OPC1(0x2) | OPC3(0x25) | (1 << 12))
    158 #define SRA		(OPC1(0x2) | OPC3(0x27))
    159 #define SRAX		(OPC1(0x2) | OPC3(0x27) | (1 << 12))
    160 #define SRL		(OPC1(0x2) | OPC3(0x26))
    161 #define SRLX		(OPC1(0x2) | OPC3(0x26) | (1 << 12))
    162 #define SUB		(OPC1(0x2) | OPC3(0x04))
    163 #define SUBC		(OPC1(0x2) | OPC3(0x0c))
    164 #define TA		(OPC1(0x2) | OPC3(0x3a) | (8 << 25))
    165 #define WRY		(OPC1(0x2) | OPC3(0x30) | DA(0))
    166 #define XOR		(OPC1(0x2) | OPC3(0x03))
    167 #define XNOR		(OPC1(0x2) | OPC3(0x07))
    168 
    169 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    170 #define MAX_DISP	(0x1fffff)
    171 #define MIN_DISP	(-0x200000)
    172 #define DISP_MASK	(0x3fffff)
    173 
    174 #define BICC		(OPC1(0x0) | OPC2(0x2))
    175 #define FBFCC		(OPC1(0x0) | OPC2(0x6))
    176 #define SLL_W		SLL
    177 #define SDIV		(OPC1(0x2) | OPC3(0x0f))
    178 #define SMUL		(OPC1(0x2) | OPC3(0x0b))
    179 #define UDIV		(OPC1(0x2) | OPC3(0x0e))
    180 #define UMUL		(OPC1(0x2) | OPC3(0x0a))
    181 #else
    182 #define SLL_W		SLLX
    183 #endif
    184 
    185 #define SIMM_MAX	(0x0fff)
    186 #define SIMM_MIN	(-0x1000)
    187 
    188 /* dest_reg is the absolute name of the register
    189    Useful for reordering instructions in the delay slot. */
    190 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot)
    191 {
    192 	sljit_ins *ptr;
    193 	SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS
    194 		|| (delay_slot & DST_INS_MASK) == MOVABLE_INS
    195 		|| (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f));
    196 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
    197 	FAIL_IF(!ptr);
    198 	*ptr = ins;
    199 	compiler->size++;
    200 	compiler->delay_slot = delay_slot;
    201 	return SLJIT_SUCCESS;
    202 }
    203 
    204 static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
    205 {
    206 	sljit_sw diff;
    207 	sljit_uw target_addr;
    208 	sljit_ins *inst;
    209 	sljit_ins saved_inst;
    210 
    211 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
    212 		return code_ptr;
    213 
    214 	if (jump->flags & JUMP_ADDR)
    215 		target_addr = jump->u.target;
    216 	else {
    217 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    218 		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
    219 	}
    220 	inst = (sljit_ins*)jump->addr;
    221 
    222 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    223 	if (jump->flags & IS_CALL) {
    224 		/* Call is always patchable on sparc 32. */
    225 		jump->flags |= PATCH_CALL;
    226 		if (jump->flags & IS_MOVABLE) {
    227 			inst[0] = inst[-1];
    228 			inst[-1] = CALL;
    229 			jump->addr -= sizeof(sljit_ins);
    230 			return inst;
    231 		}
    232 		inst[0] = CALL;
    233 		inst[1] = NOP;
    234 		return inst + 1;
    235 	}
    236 #else
    237 	/* Both calls and BPr instructions shall not pass this point. */
    238 #error "Implementation required"
    239 #endif
    240 
    241 	if (jump->flags & IS_COND)
    242 		inst--;
    243 
    244 	diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1) - executable_offset) >> 2;
    245 
    246 	if (jump->flags & IS_MOVABLE) {
    247 		if (diff <= MAX_DISP && diff >= MIN_DISP) {
    248 			jump->flags |= PATCH_B;
    249 			inst--;
    250 			if (jump->flags & IS_COND) {
    251 				saved_inst = inst[0];
    252 				inst[0] = inst[1] ^ (1 << 28);
    253 				inst[1] = saved_inst;
    254 			} else {
    255 				inst[1] = inst[0];
    256 				inst[0] = BICC | DA(0x8);
    257 			}
    258 			jump->addr = (sljit_uw)inst;
    259 			return inst + 1;
    260 		}
    261 	}
    262 
    263 	diff += sizeof(sljit_ins);
    264 
    265 	if (diff <= MAX_DISP && diff >= MIN_DISP) {
    266 		jump->flags |= PATCH_B;
    267 		if (jump->flags & IS_COND)
    268 			inst[0] ^= (1 << 28);
    269 		else
    270 			inst[0] = BICC | DA(0x8);
    271 		inst[1] = NOP;
    272 		jump->addr = (sljit_uw)inst;
    273 		return inst + 1;
    274 	}
    275 
    276 	return code_ptr;
    277 }
    278 
    279 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    280 {
    281 	struct sljit_memory_fragment *buf;
    282 	sljit_ins *code;
    283 	sljit_ins *code_ptr;
    284 	sljit_ins *buf_ptr;
    285 	sljit_ins *buf_end;
    286 	sljit_uw word_count;
    287 	sljit_sw executable_offset;
    288 	sljit_uw addr;
    289 
    290 	struct sljit_label *label;
    291 	struct sljit_jump *jump;
    292 	struct sljit_const *const_;
    293 
    294 	CHECK_ERROR_PTR();
    295 	CHECK_PTR(check_sljit_generate_code(compiler));
    296 	reverse_buf(compiler);
    297 
    298 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
    299 	PTR_FAIL_WITH_EXEC_IF(code);
    300 	buf = compiler->buf;
    301 
    302 	code_ptr = code;
    303 	word_count = 0;
    304 	executable_offset = SLJIT_EXEC_OFFSET(code);
    305 
    306 	label = compiler->labels;
    307 	jump = compiler->jumps;
    308 	const_ = compiler->consts;
    309 
    310 	do {
    311 		buf_ptr = (sljit_ins*)buf->memory;
    312 		buf_end = buf_ptr + (buf->used_size >> 2);
    313 		do {
    314 			*code_ptr = *buf_ptr++;
    315 			SLJIT_ASSERT(!label || label->size >= word_count);
    316 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
    317 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
    318 			/* These structures are ordered by their address. */
    319 			if (label && label->size == word_count) {
    320 				/* Just recording the address. */
    321 				label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    322 				label->size = code_ptr - code;
    323 				label = label->next;
    324 			}
    325 			if (jump && jump->addr == word_count) {
    326 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    327 				jump->addr = (sljit_uw)(code_ptr - 3);
    328 #else
    329 				jump->addr = (sljit_uw)(code_ptr - 6);
    330 #endif
    331 				code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
    332 				jump = jump->next;
    333 			}
    334 			if (const_ && const_->addr == word_count) {
    335 				/* Just recording the address. */
    336 				const_->addr = (sljit_uw)code_ptr;
    337 				const_ = const_->next;
    338 			}
    339 			code_ptr ++;
    340 			word_count ++;
    341 		} while (buf_ptr < buf_end);
    342 
    343 		buf = buf->next;
    344 	} while (buf);
    345 
    346 	if (label && label->size == word_count) {
    347 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    348 		label->size = code_ptr - code;
    349 		label = label->next;
    350 	}
    351 
    352 	SLJIT_ASSERT(!label);
    353 	SLJIT_ASSERT(!jump);
    354 	SLJIT_ASSERT(!const_);
    355 	SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
    356 
    357 	jump = compiler->jumps;
    358 	while (jump) {
    359 		do {
    360 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
    361 			buf_ptr = (sljit_ins *)jump->addr;
    362 
    363 			if (jump->flags & PATCH_CALL) {
    364 				addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
    365 				SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000);
    366 				buf_ptr[0] = CALL | (addr & 0x3fffffff);
    367 				break;
    368 			}
    369 			if (jump->flags & PATCH_B) {
    370 				addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
    371 				SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP);
    372 				buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK);
    373 				break;
    374 			}
    375 
    376 			/* Set the fields of immediate loads. */
    377 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    378 			buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff);
    379 			buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff);
    380 #else
    381 #error "Implementation required"
    382 #endif
    383 		} while (0);
    384 		jump = jump->next;
    385 	}
    386 
    387 
    388 	compiler->error = SLJIT_ERR_COMPILED;
    389 	compiler->executable_offset = executable_offset;
    390 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
    391 
    392 	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
    393 	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    394 
    395 	SLJIT_CACHE_FLUSH(code, code_ptr);
    396 	return code;
    397 }
    398 
    399 /* --------------------------------------------------------------------- */
    400 /*  Entry, exit                                                          */
    401 /* --------------------------------------------------------------------- */
    402 
    403 /* Creates an index in data_transfer_insts array. */
    404 #define LOAD_DATA	0x01
    405 #define WORD_DATA	0x00
    406 #define BYTE_DATA	0x02
    407 #define HALF_DATA	0x04
    408 #define INT_DATA	0x06
    409 #define SIGNED_DATA	0x08
    410 /* Separates integer and floating point registers */
    411 #define GPR_REG		0x0f
    412 #define DOUBLE_DATA	0x10
    413 #define SINGLE_DATA	0x12
    414 
    415 #define MEM_MASK	0x1f
    416 
    417 #define WRITE_BACK	0x00020
    418 #define ARG_TEST	0x00040
    419 #define ALT_KEEP_CACHE	0x00080
    420 #define CUMULATIVE_OP	0x00100
    421 #define IMM_OP		0x00200
    422 #define SRC2_IMM	0x00400
    423 
    424 #define REG_DEST	0x00800
    425 #define REG2_SOURCE	0x01000
    426 #define SLOW_SRC1	0x02000
    427 #define SLOW_SRC2	0x04000
    428 #define SLOW_DEST	0x08000
    429 
    430 /* SET_FLAGS (0x10 << 19) also belong here! */
    431 
    432 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    433 #include "sljitNativeSPARC_32.c"
    434 #else
    435 #include "sljitNativeSPARC_64.c"
    436 #endif
    437 
    438 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
    439 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    440 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    441 {
    442 	CHECK_ERROR();
    443 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    444 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    445 
    446 	local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
    447 	compiler->local_size = local_size;
    448 
    449 	if (local_size <= SIMM_MAX) {
    450 		FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS));
    451 	}
    452 	else {
    453 		FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size));
    454 		FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS));
    455 	}
    456 
    457 	/* Arguments are in their appropriate registers. */
    458 
    459 	return SLJIT_SUCCESS;
    460 }
    461 
    462 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    463 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    464 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    465 {
    466 	CHECK_ERROR();
    467 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    468 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    469 
    470 	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
    471 	return SLJIT_SUCCESS;
    472 }
    473 
    474 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    475 {
    476 	CHECK_ERROR();
    477 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    478 
    479 	if (op != SLJIT_MOV || !FAST_IS_REG(src)) {
    480 		FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    481 		src = SLJIT_R0;
    482 	}
    483 
    484 	FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
    485 	return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS);
    486 }
    487 
    488 /* --------------------------------------------------------------------- */
    489 /*  Operators                                                            */
    490 /* --------------------------------------------------------------------- */
    491 
    492 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    493 #define ARCH_32_64(a, b)	a
    494 #else
    495 #define ARCH_32_64(a, b)	b
    496 #endif
    497 
    498 static const sljit_ins data_transfer_insts[16 + 4] = {
    499 /* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
    500 /* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
    501 /* u b s */ OPC1(3) | OPC3(0x05) /* stb */,
    502 /* u b l */ OPC1(3) | OPC3(0x01) /* ldub */,
    503 /* u h s */ OPC1(3) | OPC3(0x06) /* sth */,
    504 /* u h l */ OPC1(3) | OPC3(0x02) /* lduh */,
    505 /* u i s */ OPC1(3) | OPC3(0x04) /* stw */,
    506 /* u i l */ OPC1(3) | OPC3(0x00) /* lduw */,
    507 
    508 /* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
    509 /* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
    510 /* s b s */ OPC1(3) | OPC3(0x05) /* stb */,
    511 /* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */,
    512 /* s h s */ OPC1(3) | OPC3(0x06) /* sth */,
    513 /* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */,
    514 /* s i s */ OPC1(3) | OPC3(0x04) /* stw */,
    515 /* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */),
    516 
    517 /* d   s */ OPC1(3) | OPC3(0x27),
    518 /* d   l */ OPC1(3) | OPC3(0x23),
    519 /* s   s */ OPC1(3) | OPC3(0x24),
    520 /* s   l */ OPC1(3) | OPC3(0x20),
    521 };
    522 
    523 #undef ARCH_32_64
    524 
    525 /* Can perform an operation using at most 1 instruction. */
    526 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
    527 {
    528 	SLJIT_ASSERT(arg & SLJIT_MEM);
    529 
    530 	if (!(flags & WRITE_BACK) || !(arg & REG_MASK)) {
    531 		if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN)
    532 				|| ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) {
    533 			/* Works for both absoulte and relative addresses (immediate case). */
    534 			if (SLJIT_UNLIKELY(flags & ARG_TEST))
    535 				return 1;
    536 			FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
    537 				| ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg))
    538 				| S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)),
    539 				((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
    540 			return -1;
    541 		}
    542 	}
    543 	return 0;
    544 }
    545 
    546 /* See getput_arg below.
    547    Note: can_cache is called only for binary operators. Those
    548    operators always uses word arguments without write back. */
    549 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
    550 {
    551 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
    552 
    553 	/* Simple operation except for updates. */
    554 	if (arg & OFFS_REG_MASK) {
    555 		argw &= 0x3;
    556 		SLJIT_ASSERT(argw);
    557 		next_argw &= 0x3;
    558 		if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == next_argw)
    559 			return 1;
    560 		return 0;
    561 	}
    562 
    563 	if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
    564 		return 1;
    565 	return 0;
    566 }
    567 
    568 /* Emit the necessary instructions. See can_cache above. */
    569 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
    570 {
    571 	sljit_s32 base, arg2, delay_slot;
    572 	sljit_ins dest;
    573 
    574 	SLJIT_ASSERT(arg & SLJIT_MEM);
    575 	if (!(next_arg & SLJIT_MEM)) {
    576 		next_arg = 0;
    577 		next_argw = 0;
    578 	}
    579 
    580 	base = arg & REG_MASK;
    581 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
    582 		argw &= 0x3;
    583 
    584 		/* Using the cache. */
    585 		if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw))
    586 			arg2 = TMP_REG3;
    587 		else {
    588 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
    589 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
    590 				compiler->cache_argw = argw;
    591 				arg2 = TMP_REG3;
    592 			}
    593 			else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && reg != OFFS_REG(arg))
    594 				arg2 = reg;
    595 			else /* It must be a mov operation, so tmp1 must be free to use. */
    596 				arg2 = TMP_REG1;
    597 			FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2)));
    598 		}
    599 	}
    600 	else {
    601 		/* Using the cache. */
    602 		if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) {
    603 			if (argw != compiler->cache_argw) {
    604 				FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
    605 				compiler->cache_argw = argw;
    606 			}
    607 			arg2 = TMP_REG3;
    608 		} else {
    609 			if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) {
    610 				compiler->cache_arg = SLJIT_MEM;
    611 				compiler->cache_argw = argw;
    612 				arg2 = TMP_REG3;
    613 			}
    614 			else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base)
    615 				arg2 = reg;
    616 			else /* It must be a mov operation, so tmp1 must be free to use. */
    617 				arg2 = TMP_REG1;
    618 			FAIL_IF(load_immediate(compiler, arg2, argw));
    619 		}
    620 	}
    621 
    622 	dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg));
    623 	delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS;
    624 	if (!base)
    625 		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot);
    626 	if (!(flags & WRITE_BACK))
    627 		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
    628 	FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot));
    629 	return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base));
    630 }
    631 
    632 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
    633 {
    634 	if (getput_arg_fast(compiler, flags, reg, arg, argw))
    635 		return compiler->error;
    636 	compiler->cache_arg = 0;
    637 	compiler->cache_argw = 0;
    638 	return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
    639 }
    640 
    641 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
    642 {
    643 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
    644 		return compiler->error;
    645 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
    646 }
    647 
    648 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
    649 	sljit_s32 dst, sljit_sw dstw,
    650 	sljit_s32 src1, sljit_sw src1w,
    651 	sljit_s32 src2, sljit_sw src2w)
    652 {
    653 	/* arg1 goes to TMP_REG1 or src reg
    654 	   arg2 goes to TMP_REG2, imm or src reg
    655 	   TMP_REG3 can be used for caching
    656 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
    657 	sljit_s32 dst_r = TMP_REG2;
    658 	sljit_s32 src1_r;
    659 	sljit_sw src2_r = 0;
    660 	sljit_s32 sugg_src2_r = TMP_REG2;
    661 
    662 	if (!(flags & ALT_KEEP_CACHE)) {
    663 		compiler->cache_arg = 0;
    664 		compiler->cache_argw = 0;
    665 	}
    666 
    667 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
    668 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
    669 			return SLJIT_SUCCESS;
    670 	}
    671 	else if (FAST_IS_REG(dst)) {
    672 		dst_r = dst;
    673 		flags |= REG_DEST;
    674 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
    675 			sugg_src2_r = dst_r;
    676 	}
    677 	else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
    678 		flags |= SLOW_DEST;
    679 
    680 	if (flags & IMM_OP) {
    681 		if ((src2 & SLJIT_IMM) && src2w) {
    682 			if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
    683 				flags |= SRC2_IMM;
    684 				src2_r = src2w;
    685 			}
    686 		}
    687 		if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
    688 			if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
    689 				flags |= SRC2_IMM;
    690 				src2_r = src1w;
    691 
    692 				/* And swap arguments. */
    693 				src1 = src2;
    694 				src1w = src2w;
    695 				src2 = SLJIT_IMM;
    696 				/* src2w = src2_r unneeded. */
    697 			}
    698 		}
    699 	}
    700 
    701 	/* Source 1. */
    702 	if (FAST_IS_REG(src1))
    703 		src1_r = src1;
    704 	else if (src1 & SLJIT_IMM) {
    705 		if (src1w) {
    706 			FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
    707 			src1_r = TMP_REG1;
    708 		}
    709 		else
    710 			src1_r = 0;
    711 	}
    712 	else {
    713 		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
    714 			FAIL_IF(compiler->error);
    715 		else
    716 			flags |= SLOW_SRC1;
    717 		src1_r = TMP_REG1;
    718 	}
    719 
    720 	/* Source 2. */
    721 	if (FAST_IS_REG(src2)) {
    722 		src2_r = src2;
    723 		flags |= REG2_SOURCE;
    724 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
    725 			dst_r = src2_r;
    726 	}
    727 	else if (src2 & SLJIT_IMM) {
    728 		if (!(flags & SRC2_IMM)) {
    729 			if (src2w) {
    730 				FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
    731 				src2_r = sugg_src2_r;
    732 			}
    733 			else {
    734 				src2_r = 0;
    735 				if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM))
    736 					dst_r = 0;
    737 			}
    738 		}
    739 	}
    740 	else {
    741 		if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w))
    742 			FAIL_IF(compiler->error);
    743 		else
    744 			flags |= SLOW_SRC2;
    745 		src2_r = sugg_src2_r;
    746 	}
    747 
    748 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
    749 		SLJIT_ASSERT(src2_r == TMP_REG2);
    750 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
    751 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
    752 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
    753 		}
    754 		else {
    755 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
    756 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
    757 		}
    758 	}
    759 	else if (flags & SLOW_SRC1)
    760 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
    761 	else if (flags & SLOW_SRC2)
    762 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
    763 
    764 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
    765 
    766 	if (dst & SLJIT_MEM) {
    767 		if (!(flags & SLOW_DEST)) {
    768 			getput_arg_fast(compiler, flags, dst_r, dst, dstw);
    769 			return compiler->error;
    770 		}
    771 		return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
    772 	}
    773 
    774 	return SLJIT_SUCCESS;
    775 }
    776 
    777 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
    778 {
    779 	CHECK_ERROR();
    780 	CHECK(check_sljit_emit_op0(compiler, op));
    781 
    782 	op = GET_OPCODE(op);
    783 	switch (op) {
    784 	case SLJIT_BREAKPOINT:
    785 		return push_inst(compiler, TA, UNMOVABLE_INS);
    786 	case SLJIT_NOP:
    787 		return push_inst(compiler, NOP, UNMOVABLE_INS);
    788 	case SLJIT_LMUL_UW:
    789 	case SLJIT_LMUL_SW:
    790 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    791 		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
    792 		return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1));
    793 #else
    794 #error "Implementation required"
    795 #endif
    796 	case SLJIT_DIVMOD_UW:
    797 	case SLJIT_DIVMOD_SW:
    798 	case SLJIT_DIV_UW:
    799 	case SLJIT_DIV_SW:
    800 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
    801 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    802 		if ((op | 0x2) == SLJIT_DIV_UW)
    803 			FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS));
    804 		else {
    805 			FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1)));
    806 			FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS));
    807 		}
    808 		if (op <= SLJIT_DIVMOD_SW)
    809 			FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2)));
    810 		FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
    811 		if (op >= SLJIT_DIV_UW)
    812 			return SLJIT_SUCCESS;
    813 		FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1)));
    814 		return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1));
    815 #else
    816 #error "Implementation required"
    817 #endif
    818 	}
    819 
    820 	return SLJIT_SUCCESS;
    821 }
    822 
    823 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
    824 	sljit_s32 dst, sljit_sw dstw,
    825 	sljit_s32 src, sljit_sw srcw)
    826 {
    827 	sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
    828 
    829 	CHECK_ERROR();
    830 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
    831 	ADJUST_LOCAL_OFFSET(dst, dstw);
    832 	ADJUST_LOCAL_OFFSET(src, srcw);
    833 
    834 	op = GET_OPCODE(op);
    835 	switch (op) {
    836 	case SLJIT_MOV:
    837 	case SLJIT_MOV_P:
    838 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
    839 
    840 	case SLJIT_MOV_U32:
    841 		return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
    842 
    843 	case SLJIT_MOV_S32:
    844 		return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
    845 
    846 	case SLJIT_MOV_U8:
    847 		return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
    848 
    849 	case SLJIT_MOV_S8:
    850 		return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
    851 
    852 	case SLJIT_MOV_U16:
    853 		return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
    854 
    855 	case SLJIT_MOV_S16:
    856 		return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
    857 
    858 	case SLJIT_MOVU:
    859 	case SLJIT_MOVU_P:
    860 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
    861 
    862 	case SLJIT_MOVU_U32:
    863 		return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
    864 
    865 	case SLJIT_MOVU_S32:
    866 		return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
    867 
    868 	case SLJIT_MOVU_U8:
    869 		return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
    870 
    871 	case SLJIT_MOVU_S8:
    872 		return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
    873 
    874 	case SLJIT_MOVU_U16:
    875 		return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
    876 
    877 	case SLJIT_MOVU_S16:
    878 		return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
    879 
    880 	case SLJIT_NOT:
    881 	case SLJIT_CLZ:
    882 		return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
    883 
    884 	case SLJIT_NEG:
    885 		return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
    886 	}
    887 
    888 	return SLJIT_SUCCESS;
    889 }
    890 
    891 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
    892 	sljit_s32 dst, sljit_sw dstw,
    893 	sljit_s32 src1, sljit_sw src1w,
    894 	sljit_s32 src2, sljit_sw src2w)
    895 {
    896 	sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
    897 
    898 	CHECK_ERROR();
    899 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
    900 	ADJUST_LOCAL_OFFSET(dst, dstw);
    901 	ADJUST_LOCAL_OFFSET(src1, src1w);
    902 	ADJUST_LOCAL_OFFSET(src2, src2w);
    903 
    904 	op = GET_OPCODE(op);
    905 	switch (op) {
    906 	case SLJIT_ADD:
    907 	case SLJIT_ADDC:
    908 	case SLJIT_MUL:
    909 	case SLJIT_AND:
    910 	case SLJIT_OR:
    911 	case SLJIT_XOR:
    912 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
    913 
    914 	case SLJIT_SUB:
    915 	case SLJIT_SUBC:
    916 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
    917 
    918 	case SLJIT_SHL:
    919 	case SLJIT_LSHR:
    920 	case SLJIT_ASHR:
    921 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
    922 		if (src2 & SLJIT_IMM)
    923 			src2w &= 0x1f;
    924 #else
    925 		SLJIT_UNREACHABLE();
    926 #endif
    927 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
    928 	}
    929 
    930 	return SLJIT_SUCCESS;
    931 }
    932 
    933 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
    934 {
    935 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
    936 	return reg_map[reg];
    937 }
    938 
    939 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
    940 {
    941 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
    942 	return reg << 1;
    943 }
    944 
    945 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
    946 	void *instruction, sljit_s32 size)
    947 {
    948 	CHECK_ERROR();
    949 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
    950 
    951 	return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
    952 }
    953 
    954 /* --------------------------------------------------------------------- */
    955 /*  Floating point operators                                             */
    956 /* --------------------------------------------------------------------- */
    957 
    958 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
    959 {
    960 #ifdef SLJIT_IS_FPU_AVAILABLE
    961 	return SLJIT_IS_FPU_AVAILABLE;
    962 #else
    963 	/* Available by default. */
    964 	return 1;
    965 #endif
    966 }
    967 
    968 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7))
    969 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
    970 #define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))
    971 
    972 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
    973 	sljit_s32 dst, sljit_sw dstw,
    974 	sljit_s32 src, sljit_sw srcw)
    975 {
    976 	if (src & SLJIT_MEM) {
    977 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
    978 		src = TMP_FREG1;
    979 	}
    980 	else
    981 		src <<= 1;
    982 
    983 	FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS));
    984 
    985 	if (dst == SLJIT_UNUSED)
    986 		return SLJIT_SUCCESS;
    987 
    988 	if (FAST_IS_REG(dst)) {
    989 		FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
    990 		return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET);
    991 	}
    992 
    993 	/* Store the integer value from a VFP register. */
    994 	return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
    995 }
    996 
    997 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
    998 	sljit_s32 dst, sljit_sw dstw,
    999 	sljit_s32 src, sljit_sw srcw)
   1000 {
   1001 	sljit_s32 dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
   1002 
   1003 	if (src & SLJIT_IMM) {
   1004 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1005 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
   1006 			srcw = (sljit_s32)srcw;
   1007 #endif
   1008 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   1009 		src = TMP_REG1;
   1010 		srcw = 0;
   1011 	}
   1012 
   1013 	if (FAST_IS_REG(src)) {
   1014 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1015 		src = SLJIT_MEM1(SLJIT_SP);
   1016 		srcw = FLOAT_TMP_MEM_OFFSET;
   1017 	}
   1018 
   1019 	FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1020 	FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS));
   1021 
   1022 	if (dst & SLJIT_MEM)
   1023 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1024 	return SLJIT_SUCCESS;
   1025 }
   1026 
   1027 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
   1028 	sljit_s32 src1, sljit_sw src1w,
   1029 	sljit_s32 src2, sljit_sw src2w)
   1030 {
   1031 	if (src1 & SLJIT_MEM) {
   1032 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1033 		src1 = TMP_FREG1;
   1034 	}
   1035 	else
   1036 		src1 <<= 1;
   1037 
   1038 	if (src2 & SLJIT_MEM) {
   1039 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
   1040 		src2 = TMP_FREG2;
   1041 	}
   1042 	else
   1043 		src2 <<= 1;
   1044 
   1045 	return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS);
   1046 }
   1047 
   1048 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
   1049 	sljit_s32 dst, sljit_sw dstw,
   1050 	sljit_s32 src, sljit_sw srcw)
   1051 {
   1052 	sljit_s32 dst_r;
   1053 
   1054 	CHECK_ERROR();
   1055 	compiler->cache_arg = 0;
   1056 	compiler->cache_argw = 0;
   1057 
   1058 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
   1059 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   1060 
   1061 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
   1062 		op ^= SLJIT_F32_OP;
   1063 
   1064 	dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
   1065 
   1066 	if (src & SLJIT_MEM) {
   1067 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
   1068 		src = dst_r;
   1069 	}
   1070 	else
   1071 		src <<= 1;
   1072 
   1073 	switch (GET_OPCODE(op)) {
   1074 	case SLJIT_MOV_F64:
   1075 		if (src != dst_r) {
   1076 			if (dst_r != TMP_FREG1) {
   1077 				FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS));
   1078 				if (!(op & SLJIT_F32_OP))
   1079 					FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
   1080 			}
   1081 			else
   1082 				dst_r = src;
   1083 		}
   1084 		break;
   1085 	case SLJIT_NEG_F64:
   1086 		FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS));
   1087 		if (dst_r != src && !(op & SLJIT_F32_OP))
   1088 			FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
   1089 		break;
   1090 	case SLJIT_ABS_F64:
   1091 		FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS));
   1092 		if (dst_r != src && !(op & SLJIT_F32_OP))
   1093 			FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
   1094 		break;
   1095 	case SLJIT_CONV_F64_FROM_F32:
   1096 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS));
   1097 		op ^= SLJIT_F32_OP;
   1098 		break;
   1099 	}
   1100 
   1101 	if (dst & SLJIT_MEM)
   1102 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
   1103 	return SLJIT_SUCCESS;
   1104 }
   1105 
   1106 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
   1107 	sljit_s32 dst, sljit_sw dstw,
   1108 	sljit_s32 src1, sljit_sw src1w,
   1109 	sljit_s32 src2, sljit_sw src2w)
   1110 {
   1111 	sljit_s32 dst_r, flags = 0;
   1112 
   1113 	CHECK_ERROR();
   1114 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1115 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1116 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1117 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1118 
   1119 	compiler->cache_arg = 0;
   1120 	compiler->cache_argw = 0;
   1121 
   1122 	dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
   1123 
   1124 	if (src1 & SLJIT_MEM) {
   1125 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
   1126 			FAIL_IF(compiler->error);
   1127 			src1 = TMP_FREG1;
   1128 		} else
   1129 			flags |= SLOW_SRC1;
   1130 	}
   1131 	else
   1132 		src1 <<= 1;
   1133 
   1134 	if (src2 & SLJIT_MEM) {
   1135 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
   1136 			FAIL_IF(compiler->error);
   1137 			src2 = TMP_FREG2;
   1138 		} else
   1139 			flags |= SLOW_SRC2;
   1140 	}
   1141 	else
   1142 		src2 <<= 1;
   1143 
   1144 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
   1145 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1146 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
   1147 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1148 		}
   1149 		else {
   1150 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1151 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1152 		}
   1153 	}
   1154 	else if (flags & SLOW_SRC1)
   1155 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1156 	else if (flags & SLOW_SRC2)
   1157 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1158 
   1159 	if (flags & SLOW_SRC1)
   1160 		src1 = TMP_FREG1;
   1161 	if (flags & SLOW_SRC2)
   1162 		src2 = TMP_FREG2;
   1163 
   1164 	switch (GET_OPCODE(op)) {
   1165 	case SLJIT_ADD_F64:
   1166 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
   1167 		break;
   1168 
   1169 	case SLJIT_SUB_F64:
   1170 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
   1171 		break;
   1172 
   1173 	case SLJIT_MUL_F64:
   1174 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
   1175 		break;
   1176 
   1177 	case SLJIT_DIV_F64:
   1178 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
   1179 		break;
   1180 	}
   1181 
   1182 	if (dst_r == TMP_FREG2)
   1183 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
   1184 
   1185 	return SLJIT_SUCCESS;
   1186 }
   1187 
   1188 #undef FLOAT_DATA
   1189 #undef SELECT_FOP
   1190 
   1191 /* --------------------------------------------------------------------- */
   1192 /*  Other instructions                                                   */
   1193 /* --------------------------------------------------------------------- */
   1194 
   1195 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
   1196 {
   1197 	CHECK_ERROR();
   1198 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
   1199 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1200 
   1201 	/* For UNUSED dst. Uncommon, but possible. */
   1202 	if (dst == SLJIT_UNUSED)
   1203 		return SLJIT_SUCCESS;
   1204 
   1205 	if (FAST_IS_REG(dst))
   1206 		return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst));
   1207 
   1208 	/* Memory. */
   1209 	return emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw);
   1210 }
   1211 
   1212 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
   1213 {
   1214 	CHECK_ERROR();
   1215 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
   1216 	ADJUST_LOCAL_OFFSET(src, srcw);
   1217 
   1218 	if (FAST_IS_REG(src))
   1219 		FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK)));
   1220 	else if (src & SLJIT_MEM)
   1221 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw));
   1222 	else if (src & SLJIT_IMM)
   1223 		FAIL_IF(load_immediate(compiler, TMP_LINK, srcw));
   1224 
   1225 	FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS));
   1226 	return push_inst(compiler, NOP, UNMOVABLE_INS);
   1227 }
   1228 
   1229 /* --------------------------------------------------------------------- */
   1230 /*  Conditional instructions                                             */
   1231 /* --------------------------------------------------------------------- */
   1232 
   1233 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   1234 {
   1235 	struct sljit_label *label;
   1236 
   1237 	CHECK_ERROR_PTR();
   1238 	CHECK_PTR(check_sljit_emit_label(compiler));
   1239 
   1240 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   1241 		return compiler->last_label;
   1242 
   1243 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   1244 	PTR_FAIL_IF(!label);
   1245 	set_label(label, compiler);
   1246 	compiler->delay_slot = UNMOVABLE_INS;
   1247 	return label;
   1248 }
   1249 
   1250 static sljit_ins get_cc(sljit_s32 type)
   1251 {
   1252 	switch (type) {
   1253 	case SLJIT_EQUAL:
   1254 	case SLJIT_MUL_NOT_OVERFLOW:
   1255 	case SLJIT_NOT_EQUAL_F64: /* Unordered. */
   1256 		return DA(0x1);
   1257 
   1258 	case SLJIT_NOT_EQUAL:
   1259 	case SLJIT_MUL_OVERFLOW:
   1260 	case SLJIT_EQUAL_F64:
   1261 		return DA(0x9);
   1262 
   1263 	case SLJIT_LESS:
   1264 	case SLJIT_GREATER_F64: /* Unordered. */
   1265 		return DA(0x5);
   1266 
   1267 	case SLJIT_GREATER_EQUAL:
   1268 	case SLJIT_LESS_EQUAL_F64:
   1269 		return DA(0xd);
   1270 
   1271 	case SLJIT_GREATER:
   1272 	case SLJIT_GREATER_EQUAL_F64: /* Unordered. */
   1273 		return DA(0xc);
   1274 
   1275 	case SLJIT_LESS_EQUAL:
   1276 	case SLJIT_LESS_F64:
   1277 		return DA(0x4);
   1278 
   1279 	case SLJIT_SIG_LESS:
   1280 		return DA(0x3);
   1281 
   1282 	case SLJIT_SIG_GREATER_EQUAL:
   1283 		return DA(0xb);
   1284 
   1285 	case SLJIT_SIG_GREATER:
   1286 		return DA(0xa);
   1287 
   1288 	case SLJIT_SIG_LESS_EQUAL:
   1289 		return DA(0x2);
   1290 
   1291 	case SLJIT_OVERFLOW:
   1292 	case SLJIT_UNORDERED_F64:
   1293 		return DA(0x7);
   1294 
   1295 	case SLJIT_NOT_OVERFLOW:
   1296 	case SLJIT_ORDERED_F64:
   1297 		return DA(0xf);
   1298 
   1299 	default:
   1300 		SLJIT_UNREACHABLE();
   1301 		return DA(0x8);
   1302 	}
   1303 }
   1304 
   1305 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
   1306 {
   1307 	struct sljit_jump *jump;
   1308 
   1309 	CHECK_ERROR_PTR();
   1310 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
   1311 
   1312 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   1313 	PTR_FAIL_IF(!jump);
   1314 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   1315 	type &= 0xff;
   1316 
   1317 	if (type < SLJIT_EQUAL_F64) {
   1318 		jump->flags |= IS_COND;
   1319 		if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET))
   1320 			jump->flags |= IS_MOVABLE;
   1321 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
   1322 		PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
   1323 #else
   1324 #error "Implementation required"
   1325 #endif
   1326 	}
   1327 	else if (type < SLJIT_JUMP) {
   1328 		jump->flags |= IS_COND;
   1329 		if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET))
   1330 			jump->flags |= IS_MOVABLE;
   1331 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
   1332 		PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
   1333 #else
   1334 #error "Implementation required"
   1335 #endif
   1336 	} else {
   1337 		if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
   1338 			jump->flags |= IS_MOVABLE;
   1339 		if (type >= SLJIT_FAST_CALL)
   1340 			jump->flags |= IS_CALL;
   1341 	}
   1342 
   1343 	PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
   1344 	PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS));
   1345 	jump->addr = compiler->size;
   1346 	PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
   1347 
   1348 	return jump;
   1349 }
   1350 
   1351 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
   1352 {
   1353 	struct sljit_jump *jump = NULL;
   1354 	sljit_s32 src_r;
   1355 
   1356 	CHECK_ERROR();
   1357 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
   1358 	ADJUST_LOCAL_OFFSET(src, srcw);
   1359 
   1360 	if (FAST_IS_REG(src))
   1361 		src_r = src;
   1362 	else if (src & SLJIT_IMM) {
   1363 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   1364 		FAIL_IF(!jump);
   1365 		set_jump(jump, compiler, JUMP_ADDR);
   1366 		jump->u.target = srcw;
   1367 		if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
   1368 			jump->flags |= IS_MOVABLE;
   1369 		if (type >= SLJIT_FAST_CALL)
   1370 			jump->flags |= IS_CALL;
   1371 
   1372 		FAIL_IF(emit_const(compiler, TMP_REG2, 0));
   1373 		src_r = TMP_REG2;
   1374 	}
   1375 	else {
   1376 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
   1377 		src_r = TMP_REG2;
   1378 	}
   1379 
   1380 	FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS));
   1381 	if (jump)
   1382 		jump->addr = compiler->size;
   1383 	return push_inst(compiler, NOP, UNMOVABLE_INS);
   1384 }
   1385 
   1386 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
   1387 	sljit_s32 dst, sljit_sw dstw,
   1388 	sljit_s32 src, sljit_sw srcw,
   1389 	sljit_s32 type)
   1390 {
   1391 	sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
   1392 
   1393 	CHECK_ERROR();
   1394 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
   1395 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1396 
   1397 	if (dst == SLJIT_UNUSED)
   1398 		return SLJIT_SUCCESS;
   1399 
   1400 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
   1401 	op = GET_OPCODE(op);
   1402 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
   1403 
   1404 	compiler->cache_arg = 0;
   1405 	compiler->cache_argw = 0;
   1406 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
   1407 		ADJUST_LOCAL_OFFSET(src, srcw);
   1408 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
   1409 		src = TMP_REG1;
   1410 		srcw = 0;
   1411 	}
   1412 
   1413 	type &= 0xff;
   1414 	if (type < SLJIT_EQUAL_F64)
   1415 		FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS));
   1416 	else
   1417 		FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS));
   1418 
   1419 	FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
   1420 	FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));
   1421 
   1422 	if (op >= SLJIT_ADD)
   1423 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
   1424 
   1425 	return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
   1426 #else
   1427 #error "Implementation required"
   1428 #endif
   1429 }
   1430 
   1431 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
   1432 {
   1433 	sljit_s32 reg;
   1434 	struct sljit_const *const_;
   1435 
   1436 	CHECK_ERROR_PTR();
   1437 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
   1438 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1439 
   1440 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   1441 	PTR_FAIL_IF(!const_);
   1442 	set_const(const_, compiler);
   1443 
   1444 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
   1445 
   1446 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
   1447 
   1448 	if (dst & SLJIT_MEM)
   1449 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
   1450 	return const_;
   1451 }
   1452